diff --git a/requirements.txt b/requirements.txt index 3423ded57..7803ea04d 100644 --- a/requirements.txt +++ b/requirements.txt @@ -13,3 +13,5 @@ redis==2.10.6 django-ipware smashrun-client>=0.6.0 beautifulsoup4 +google-api-python-client +oauth2client diff --git a/tapiriik/local_settings.py.example b/tapiriik/local_settings.py.example index 28f4c337b..e220028c4 100644 --- a/tapiriik/local_settings.py.example +++ b/tapiriik/local_settings.py.example @@ -55,6 +55,9 @@ SMASHRUN_CLIENT_SECRET = "####" SPORTTRACKS_CLIENT_ID = "####" SPORTTRACKS_CLIENT_SECRET = "####" +GOOGLEDRIVE_CLIENT_ID = "####" +GOOGLEDRIVE_CLIENT_SECRET = "####" + STRAVA_CLIENT_SECRET = "####" STRAVA_CLIENT_ID = "####" STRAVA_RATE_LIMITS = [] diff --git a/tapiriik/services/Dropbox/dropbox.py b/tapiriik/services/Dropbox/dropbox.py index 2c53e68e9..17ff9efc1 100644 --- a/tapiriik/services/Dropbox/dropbox.py +++ b/tapiriik/services/Dropbox/dropbox.py @@ -1,66 +1,29 @@ -from datetime import datetime, timedelta -from django.core.urlresolvers import reverse -from tapiriik.database import cachedb -from tapiriik.services.api import APIException, ServiceExceptionScope, UserException, UserExceptionType, APIExcludeActivity, ServiceException -from tapiriik.services.exception_tools import strip_context -from tapiriik.services.gpx import GPXIO -from tapiriik.services.interchange import ActivityType, UploadedActivity -from tapiriik.services.service_base import ServiceAuthenticationType, ServiceBase -from tapiriik.services.tcx import TCXIO from tapiriik.settings import WEB_ROOT, DROPBOX_APP_KEY, DROPBOX_APP_SECRET, DROPBOX_FULL_APP_KEY, DROPBOX_FULL_APP_SECRET -import bson +from tapiriik.services.service_base import ServiceAuthenticationType +from tapiriik.services.storage_service_base import StorageServiceBase +from tapiriik.services.api import APIException, UserException, UserExceptionType +from tapiriik.database import cachedb, redis +from django.core.urlresolvers import reverse +from datetime import timedelta import dropbox import json import logging -import lxml import pickle -import re import requests logger = logging.getLogger(__name__) -class DropboxService(ServiceBase): + +class DropboxService(StorageServiceBase): ID = "dropbox" DisplayName = "Dropbox" DisplayAbbreviation = "DB" AuthenticationType = ServiceAuthenticationType.OAuth AuthenticationNoFrame = True # damn dropbox, spoiling my slick UI Configurable = True - ReceivesStationaryActivities = False - - ActivityTaggingTable = { # earlier items have precedence over - ActivityType.Running: "run(?!tastic)", - ActivityType.MountainBiking: "m(oun)?t(ai)?n\s*bik(e|ing)", - ActivityType.Cycling: "(cycl(e|ing)|bik(e|ing))", - ActivityType.Walking: "walk", - ActivityType.Hiking: "hik(e|ing)", - ActivityType.DownhillSkiing: "(downhill|down(hill)?\s*ski(ing)?)", - ActivityType.CrossCountrySkiing: "(xc|cross.*country)\s*ski(ing)?", - ActivityType.Snowboarding: "snowboard(ing)?", - ActivityType.Skating: "skat(e|ing)?", - ActivityType.Swimming: "swim", - ActivityType.Wheelchair: "wheelchair", - ActivityType.Rowing: "row", - ActivityType.Elliptical: "elliptical", - ActivityType.RollerSkiing: "rollerskiing", - ActivityType.StrengthTraining: "strength( ?training)?", - ActivityType.Gym: "(gym|workout)", - ActivityType.Climbing: "climb(ing)?", - ActivityType.StandUpPaddling: "(sup|stand( |-)/up ?paddl(e|ing))", - ActivityType.Other: "(other|unknown)" - } - ConfigurationDefaults = {"SyncRoot": "/", "UploadUntagged": False, "Format":"tcx", "Filename":"%Y-%m-%d_%H-%M-%S_#NAME_#TYPE"} - SupportsHR = SupportsCadence = True + ConfigurationDefaults = {"SyncRoot": "/", "UploadUntagged": False, "Format": "tcx", "Filename":"%Y-%m-%d_%H-%M-%S_#NAME_#TYPE"} - SupportedActivities = ActivityTaggingTable.keys() - - def _app_credentials(self, full): - if full: - return (DROPBOX_FULL_APP_KEY, DROPBOX_FULL_APP_SECRET) - else: - return (DROPBOX_APP_KEY, DROPBOX_APP_SECRET) - - def _getClient(self, serviceRec): + def GetClient(self, serviceRec): from tapiriik.services import Service if "Secret" in serviceRec.Authorization: # Upgrade OAuth v1 token to v2. @@ -84,7 +47,7 @@ def _getClient(self, serviceRec): return dropbox.Dropbox(token) def WebInit(self): - self.UserAuthorizationURL = reverse("oauth_redirect", kwargs={"service": "dropbox"}) + self.UserAuthorizationURL = reverse("oauth_redirect", kwargs={"service": self.ID}) def RequiresConfiguration(self, svcRec): return svcRec.Authorization["Full"] and ("SyncRoot" not in svcRec.Config or not len(svcRec.Config["SyncRoot"])) @@ -107,6 +70,7 @@ def RetrieveAuthorizationToken(self, req, level): uid = int(result.user_id) return (uid, {"Token": result.access_token, "Full": full}) + def RevokeAuthorization(self, serviceRecord): pass # :( @@ -126,65 +90,18 @@ def _raiseDbException(self, e): raise APIException("Dropbox quota error", block=True, user_exception=UserException(UserExceptionType.AccountFull, intervention_required=True)) raise APIException("API failure - %s" % e) - def _tagActivity(self, text): - for act, pattern in self.ActivityTaggingTable.items(): - if re.search(pattern, text, re.IGNORECASE): - return act - return None - - def _getActivity(self, serviceRecord, dbcl, path, base_activity=None): - try: - metadata, file = dbcl.files_download(path) - except dropbox.exceptions.DropboxException as e: - self._raiseDbException(e) - - try: - if path.lower().endswith(".tcx"): - act = TCXIO.Parse(file.content, base_activity) - else: - act = GPXIO.Parse(file.content, base_activity) - except ValueError as e: - raise APIExcludeActivity("Invalid GPX/TCX " + str(e), activity_id=path, user_exception=UserException(UserExceptionType.Corrupt)) - except lxml.etree.XMLSyntaxError as e: - raise APIExcludeActivity("LXML parse error " + str(e), activity_id=path, user_exception=UserException(UserExceptionType.Corrupt)) - return act, metadata.rev - - def DownloadActivityList(self, svcRec, exhaustive=False): - dbcl = self._getClient(svcRec) - if not svcRec.Authorization["Full"]: - syncRoot = "/" - else: - syncRoot = svcRec.Config["SyncRoot"] + def EnumerateFiles(self, svcRec, dbcl, root, cache): # Dropbox API v2 doesn't like / as root. - if syncRoot == "/": - syncRoot = "" + if root == "/": + root = "" # New Dropbox API prefers path_lower, it would seem. - syncRoot = syncRoot.lower() - - # There used to be a massive affair going on here to cache the folder structure locally. - # Dropbox API 2.0 doesn't support the hashes I need for that. - # Oh well. Throw that data out now. Well, don't load it at all. - cache = cachedb.dropbox_cache.find_one({"ExternalID": svcRec.ExternalID}, {"ExternalID": True, "Activities": True}) - if cache is None: - cache = {"ExternalID": svcRec.ExternalID, "Activities": {}} + root = syncRoot.lower() try: list_result = dbcl.files_list_folder(syncRoot, recursive=True) except dropbox.exceptions.DropboxException as e: self._raiseDbException(e) - def cache_writeback(): - if "_id" in cache: - cachedb.dropbox_cache.save(cache) - else: - insert_result = cachedb.dropbox_cache.insert(cache) - cache["_id"] = insert_result.inserted_id - - - activities = [] - exclusions = [] - discovered_activity_cache_keys = set() - while True: for entry in list_result.entries: if not hasattr(entry, "rev"): @@ -197,143 +114,42 @@ def cache_writeback(): continue if svcRec.Authorization["Full"]: - relPath = path.replace(syncRoot, "", 1) + relPath = path.replace(root, "", 1) else: relPath = path.replace("/Apps/tapiriik/", "", 1) # dropbox api is meh api - hashedRelPath = self._hash_path(relPath) - discovered_activity_cache_keys.add(hashedRelPath) - if hashedRelPath in cache["Activities"]: - existing = cache["Activities"][hashedRelPath] - else: - existing = None - - if existing and existing["Rev"] == entry.rev: - # don't need entire activity loaded here, just UID - act = UploadedActivity() - act.UID = existing["UID"] - try: - act.StartTime = datetime.strptime(existing["StartTime"], "%H:%M:%S %d %m %Y %z") - except: - act.StartTime = datetime.strptime(existing["StartTime"], "%H:%M:%S %d %m %Y") # Exactly one user has managed to break %z :S - if "EndTime" in existing: # some cached activities may not have this, it is not essential - act.EndTime = datetime.strptime(existing["EndTime"], "%H:%M:%S %d %m %Y %z") - else: - logger.debug("Retrieving %s (%s)" % (path, "outdated meta cache" if existing else "not in meta cache")) - # get the full activity - try: - act, rev = self._getActivity(svcRec, dbcl, path) - except APIExcludeActivity as e: - logger.info("Encountered APIExcludeActivity %s" % str(e)) - exclusions.append(strip_context(e)) - continue - - try: - act.EnsureTZ() - except: - pass # We tried. - - act.Laps = [] # Yeah, I'll process the activity twice, but at this point CPU time is more plentiful than RAM. - cache["Activities"][hashedRelPath] = {"Rev": rev, "UID": act.UID, "StartTime": act.StartTime.strftime("%H:%M:%S %d %m %Y %z"), "EndTime": act.EndTime.strftime("%H:%M:%S %d %m %Y %z")} - # Incrementally update the cache db. - # Otherwise, if we crash later on in listing - # (due to OOM or similar), we'll never make progress on this account. - cache_writeback() - tagRes = self._tagActivity(relPath) - act.ServiceData = {"Path": path, "Tagged": tagRes is not None} - - act.Type = tagRes if tagRes is not None else ActivityType.Other - - logger.debug("Activity s/t %s" % act.StartTime) - - activities.append(act) - + yield (path, relPath, path, file.rev) # Perform pagination. if list_result.has_more: list_result = dbcl.files_list_folder_continue(list_result.cursor) else: break - # Drop deleted activities' records from cache. - all_activity_cache_keys = set(cache["Activities"].keys()) - for deleted_key in all_activity_cache_keys - discovered_activity_cache_keys: - del cache["Activities"][deleted_key] - - cache_writeback() - return activities, exclusions - - def DownloadActivity(self, serviceRecord, activity): - # activity might not be populated at this point, still possible to bail out - if not activity.ServiceData["Tagged"]: - if not (hasattr(serviceRecord, "Config") and "UploadUntagged" in serviceRecord.Config and serviceRecord.Config["UploadUntagged"]): - raise APIExcludeActivity("Activity untagged", permanent=False, activity_id=activity.ServiceData["Path"], user_exception=UserException(UserExceptionType.Untagged)) - - path = activity.ServiceData["Path"] - dbcl = self._getClient(serviceRecord) - activity, rev = self._getActivity(serviceRecord, dbcl, path, base_activity=activity) - - # Dropbox doesn't support stationary activities yet. - if activity.CountTotalWaypoints() <= 1: - raise APIExcludeActivity("Too few waypoints", activity_id=path, user_exception=UserException(UserExceptionType.Corrupt)) - - return activity - - def _hash_path(self, path): - import hashlib - # Can't use the raw file path as a dict key in Mongo, since who knows what'll be in it (periods especially) - # Used the activity UID for the longest time, but that causes inefficiency when >1 file represents the same activity - # So, this: - csp = hashlib.new("md5") - csp.update(path.encode('utf-8')) - return csp.hexdigest() - - def _clean_activity_name(self, name): - # https://www.dropbox.com/help/145/en - # Nothing outside BMP is allowed, either, apparently. - return re.sub("[@><:\"|?*]|[^\U00000000-\U0000d7ff\U0000e000-\U0000ffff]", "", re.sub("[/\\\]", "-", name)) - - def _format_file_name(self, format, activity): - name_pattern = re.compile("#NAME", re.IGNORECASE) - type_pattern = re.compile("#TYPE", re.IGNORECASE) - name = activity.StartTime.strftime(format) - name = name_pattern.sub(self._clean_activity_name(activity.Name) if activity.Name and len(activity.Name) > 0 and activity.Name.lower() != activity.Type.lower() else "", name) - name = type_pattern.sub(activity.Type, name) - name = re.sub(r"([\W_])\1+", r"\1", name) # To handle cases where the activity is unnamed - name = re.sub(r"^([\W_])|([\W_])$", "", name) # To deal with trailing-seperator weirdness (repeated seperator handled by prev regexp) - return name - - def UploadActivity(self, serviceRecord, activity): - format = serviceRecord.GetConfiguration()["Format"] - if format == "tcx": - if "tcx" in activity.PrerenderedFormats: - logger.debug("Using prerendered TCX") - data = activity.PrerenderedFormats["tcx"] - else: - data = TCXIO.Dump(activity) - else: - if "gpx" in activity.PrerenderedFormats: - logger.debug("Using prerendered GPX") - data = activity.PrerenderedFormats["gpx"] - else: - data = GPXIO.Dump(activity) - - dbcl = self._getClient(serviceRecord) - fname = self._format_file_name(serviceRecord.GetConfiguration()["Filename"], activity)[:250] + "." + format # DB has a max path component length of 255 chars, and we have to save for the file ext (4) and the leading slash (1) + def GetFileContents(self, serviceRecord, dbcl, path, storageid, cache): + try: + metadata, file = dbcl.files_download(path) + except dropbox.exceptions.DropboxException as e: + self._raiseDbException(e) - if not serviceRecord.Authorization["Full"]: - fpath = "/" + fname - else: - fpath = serviceRecord.Config["SyncRoot"] + "/" + fname + return file.content, metadata.rev + def PutFileContents(self, serviceRecord, dbcl, path, contents, cache): try: metadata = dbcl.files_upload(data.encode("UTF-8"), fpath, mode=dropbox.files.WriteMode.overwrite) except dropbox.exceptions.DropboxException as e: self._raiseDbException(e) - # Fake this in so we don't immediately redownload the activity next time 'round - cache = cachedb.dropbox_cache.find_one({"ExternalID": serviceRecord.ExternalID}) - cache["Activities"][self._hash_path("/" + fname)] = {"Rev": metadata.rev, "UID": activity.UID, "StartTime": activity.StartTime.strftime("%H:%M:%S %d %m %Y %z"), "EndTime": activity.EndTime.strftime("%H:%M:%S %d %m %Y %z")} - cachedb.dropbox_cache.update({"ExternalID": serviceRecord.ExternalID}, cache) # not upsert, hope the record exists at this time... - return fpath - def DeleteCachedData(self, serviceRecord): - cachedb.dropbox_cache.remove({"ExternalID": serviceRecord.ExternalID}) + return metadata.rev + + def MoveFile(self, serviceRecord, dbcl, path, destPath, cache): + dbcl.file_move(path, path.replace(".tcx", ".tcx.summary-data")) + + def ServiceCacheDB(self): + return cachedb.dropbox_cache + + def SyncRoot(self, svcRec): + if not svcRec.Authorization["Full"]: + syncRoot = "/" + else: + syncRoot = svcRec.Config["SyncRoot"] + return syncRoot diff --git a/tapiriik/services/GoogleDrive/__init__.py b/tapiriik/services/GoogleDrive/__init__.py new file mode 100644 index 000000000..414816609 --- /dev/null +++ b/tapiriik/services/GoogleDrive/__init__.py @@ -0,0 +1 @@ +from .googledrive import * diff --git a/tapiriik/services/GoogleDrive/googledrive.py b/tapiriik/services/GoogleDrive/googledrive.py new file mode 100644 index 000000000..209f57054 --- /dev/null +++ b/tapiriik/services/GoogleDrive/googledrive.py @@ -0,0 +1,340 @@ +from tapiriik.settings import WEB_ROOT, GOOGLEDRIVE_CLIENT_ID, GOOGLEDRIVE_CLIENT_SECRET +from tapiriik.services.service_base import ServiceAuthenticationType +from tapiriik.services.storage_service_base import StorageServiceBase +from tapiriik.services.service_record import ServiceRecord +from tapiriik.services.api import APIException, UserException, UserExceptionType, APIExcludeActivity, ServiceException +from tapiriik.database import cachedb, redis +from googleapiclient.discovery import build +from googleapiclient.http import MediaInMemoryUpload +from googleapiclient import errors +from oauth2client.client import OAuth2WebServerFlow, OAuth2Credentials +from django.core.urlresolvers import reverse +import logging +import httplib2 +import requests +import json + +logger = logging.getLogger(__name__) + +GOOGLE_REVOKE_URI = 'https://accounts.google.com/o/oauth2/revoke' + +# Full scope needed so that we can read files that user adds by hand +_OAUTH_SCOPE = "https://www.googleapis.com/auth/drive" + +# Mimetypes to use when uploading, keyed by extension +_MIMETYPES = { + "gpx": "application/gpx+xml", + "tcx": "application/vnd.garmin.tcx+xml" +} + +# Mimetype given to folders on google drive. +_FOLDER_MIMETYPE = "application/vnd.google-apps.folder" + +def _basename(path): + return path.split("/")[-1] + +class GoogleDriveService(StorageServiceBase): + ID = "googledrive" + DisplayName = "Google Drive" + DisplayAbbreviation = "GD" + AuthenticationType = ServiceAuthenticationType.OAuth + Configurable = True + ReceivesStationaryActivities = False + AuthenticationNoFrame = True + ConfigurationDefaults = {"SyncRoot": "/", "UploadUntagged": False, "Format":"tcx", "Filename":"%Y-%m-%d_#NAME_#TYPE"} + + def _oauthFlow(self): + return_url = WEB_ROOT + reverse("oauth_return", kwargs={"service": self.ID}) + flow = OAuth2WebServerFlow(GOOGLEDRIVE_CLIENT_ID, GOOGLEDRIVE_CLIENT_SECRET, _OAUTH_SCOPE, + redirect_uri=return_url, access_type='offline') + return flow + + def GetClient(self, serviceRec): + credentials = OAuth2Credentials.from_json(serviceRec.Authorization["Credentials"]) + http = httplib2.Http() + if credentials.access_token_expired: + logger.debug("Refreshing Google Drive credentials") + credentials.refresh(http) + serviceRec.Authorization["Credentials"] = credentials.to_json() + # Note: refreshed token doesn't get persisted, but will stick + # around in the serviceRec for the duration of a sync. + # TODO: Should use a SessionCache - tokens last 60 mins by default + http = credentials.authorize(http) + drive_service = build("drive", "v2", http=http) + return drive_service + + def WebInit(self): + self.UserAuthorizationURL = WEB_ROOT + reverse("oauth_redirect", kwargs={"service": self.ID}) + pass + + def GenerateUserAuthorizationURL(self, session, level=None): + flow = self._oauthFlow() + return flow.step1_get_authorize_url() + + def _getUserId(self, svcRec): + client = self.GetClient(svcRec) + try: + about = client.about().get().execute() + # TODO: Is this a good user ID to use? Could also use email.. + return about["rootFolderId"] + except errors.HttpError as error: + raise APIException("Google drive error fetching user ID - %s" % error) + + def RetrieveAuthorizationToken(self, req, level): + from tapiriik.services import Service + flow = self._oauthFlow() + code = req.GET["code"] + credentials = flow.step2_exchange(code) + cred_json = credentials.to_json() + + uid = self._getUserId(ServiceRecord({"Authorization": {"Credentials": cred_json}})) + return (uid, {"Credentials": cred_json}) + + def RevokeAuthorization(self, serviceRec): + credentials = OAuth2Credentials.from_json(serviceRec.Authorization["Credentials"]) + # should this just be calling credentials.revoke()? + resp = requests.post(GOOGLE_REVOKE_URI, data={"token": credentials.access_token}) + if resp.status_code == 400: + try: + result = json.loads(resp.text) + if result.get("error") == "invalid_token": + logger.debug("Google drive said token %s invalid when we tried to revoke it, oh well.." % credentials.access_token) + # Token wasn't valid anyway, we're good + return + except ValueError: + raise APIException("Error revoking Google Drive auth token, status " + str(resp.status_code) + " resp " + resp.text) + elif resp.status_code != 200: + raise APIException("Unable to revoke Google Drive auth token, status " + str(resp.status_code) + " resp " + resp.text) + pass + + def _idCache(self, cache): + if "FileIDs" not in cache: + cache["FileIDs"] = [] + return cache["FileIDs"] + + def _getFileId(self, client, path, cache): + """ get file id for the given path. Returns None if the path does not exist. + also returns cache hits used in determining the id, in case it turns out to be wrong. + """ + id_cache = self._idCache(cache) + + if path == "": + path = "/" + + assert(path.startswith("/")) + if path.endswith("/"): + path = path[:-1] + currentid = "root" + parts = path.split("/") + offset = 1 + cachehits = set() + + while offset < len(parts): + existingRecord = [x for x in id_cache if (x["Parent"] == currentid and x["Name"] == parts[offset])] + if len(existingRecord): + existingRecord = existingRecord[0] + currentid = existingRecord["ID"] + cachehits.add(currentid) + else: + try: + params = {"q": "title = '%s'" % parts[offset], "fields": "items/id"} + children = client.children().list(folderId=currentid, **params).execute() + except errors.HttpError as error: + raise APIException("Error listing Google Drive contents - %s" + str(error)) + + if not len(children.get("items", [])): + if cachehits: + # The cache may have led us astray - clear hits and try again + self._removeCachedIds(cachehits, cache) + return self._getFileId(client, path, cache) + else: + return None, None + childid = children["items"][0]["id"] + id_cache.append({"ID": childid, "Parent": currentid, "Name": parts[offset]}) + currentid = childid + offset += 1 + return currentid, cachehits + + def _removeCachedIds(self, fileids, cache): + id_cache = self._idCache(cache) + id_cache[:] = (x for x in id_cache if x["ID"] not in fileids) + + def _getFile(self, client, path, storageid, cache): + logger.info("getfile %s %s" % (storageid, path)) + if storageid: + file_id = storageid + cachehits = None + else: + file_id, cachehits = self._getFileId(client, path, cache) + logger.info("Trying to fetch id %s from path %s" % (file_id, path)) + if not file_id: + return None # File not found. + + try: + file = client.files().get(fileId=file_id).execute() + except errors.HttpError as error: + if error.resp.status == 404 and cachehits: + logger.debug("Google drive cache %s invalid - 404" % file_id) + # remove cache entries and try again + self._removeCachedIds(cachehits, cache) + return self._getFile(client, path, storageid, cache) + raise APIException("Error %d fetching Google Drive file URL - %s" % (error.resp.status, str(error))) + + if file.get("title") != _basename(path): + if not cachehits: + # shouldn't happen? + raise APIException("Error fetching Google Drive file - name didn't match") + + # Cached file ID now has different name - invalidate and try again + logger.debug("Google drive cache %s invalid - name no longer matches" % file_id) + self._removeCachedIds(cachehits, cache) + return self._getFile(client, path, storageid, cache) + + return file + + def GetFileContents(self, svcRec, client, path, storageid, cache): + """ Return a tuple of (contents, version_number) for a given path. """ + import hashlib + + file = self._getFile(client, path, storageid, cache) + if file is None or file.get("downloadUrl") is None: + # File not found or has no contents + return None, 0 + + resp, content = client._http.request(file.get("downloadUrl")) + if resp.status != 200: + raise APIException("Google drive download error - status %d" % resp.status) + + md5sum = file.get("md5Checksum") + if md5sum: + csp = hashlib.new("md5") + csp.update(content) + contentmd5 = csp.hexdigest() + if contentmd5.lower() != md5sum.lower(): + raise APIException("Google drive download error - md5 mismatch %s vs %s" % (md5sum, contentmd5)) + return content, file["version"] + + def PutFileContents(self, svcRec, client, path, contents, cache): + """ Write the contents to the file and return a version number for the newly written file. """ + fname = _basename(path) + parent = path[:-(len(fname)+1)] + logger.debug("Google Drive putting file contents for %s %s" % (parent, fname)) + parent_id, cachehits = self._getFileId(client, parent, cache) + + if parent_id is None: + # First make a directory. Only make one level up. + dirname = _basename(parent) + top_parent = parent[:-(len(dirname)+1)] + logger.debug("Google Drive creating parent - '%s' '%s'" % (top_parent, dirname)) + top_parent_id, topcachehits = self._getFileId(client, top_parent, cache) + if top_parent_id is None: + raise APIException("Parent of directory for %s does not exist, giving up" % (path,)) + + body = {"title": dirname, "mimeType": _FOLDER_MIMETYPE, "parents": [{"id": top_parent_id}]} + + try: + parent_obj = client.files().insert(body=body).execute() + except errors.HttpError as error: + if error.resp.status == 404 and topcachehits: + logger.debug("Google drive cache %s invalid - 404" % top_parent_id) + self._removeCachedIds(topcachehits.union(cachehits), cache) # remove cache entries and try again + return self.PutFileContents(svcRec, client, path, contents, cache) + raise APIException("Google drive error creating folder - %s" % error) + + parent_id = parent_obj["id"] + + extn = fname.split(".")[-1].lower() + if extn not in _MIMETYPES: + # Shouldn't happen? + raise APIException("Google drive upload only supports file types %s" % (_MIMETYPES.keys(),)) + + media_body = MediaInMemoryUpload(contents, mimetype=_MIMETYPES[extn], resumable=True) + # TODO: Maybe description should ideally be Activity.Notes? + body = {"title": fname, "description": "Uploaded by Tapiriik", "mimeType": _MIMETYPES[extn], "parents": [{"id": parent_id}]} + + try: + file = client.files().insert(body=body, media_body=media_body).execute() + return file["version"] + except errors.HttpError as error: + if error.resp.status == 404 and cachehits: + logger.debug("Google drive cache %s invalid - 404" % parent_id) + self._removeCachedIds(cachehits, cache) # remove cache entries and try again + return self.PutFileContents(svcRec, client, path, contents, cache) + raise APIException("Google drive upload error - %s" % error) + + def MoveFile(self, svcRec, client, path, destPath, cache): + """ Move/rename the file "path" to "destPath". """ + fname1 = _basename(path) + fname2 = _basename(destPath) + if path[:-len(fname1)] != destPath[:-len(fname2)]: + # Currently only support renaming files in the same dir, otherwise + # we have to twiddle parents which is hard.. + raise NotImplementedError() + + try: + file = self._getFile(client, path, cache) + if file is None: + raise APIException("Error renaming file: %s not found" % path) + file["title"] = fname1 + client.files().update(fileId=file["id"], body=file, newRevision=False).execute() + except errors.HttpError as error: + raise APIException("Error renaming file: %s" % error) + + def ServiceCacheDB(self): + return cachedb.googledrive_cache + + def SyncRoot(self, svcRec): + # TODO: Make this configurable + return "/tapiriik" + + def EnumerateFiles(self, svcRec, client, root, cache): + root_id, cachehits = self._getFileId(client, root, cache) + if root_id is None: + # Root does not exist.. that's ok, just no files to list. + return + + idcache = self._idCache(cache) + yield from self._folderRecurse(svcRec, client, root_id, root, idcache) + + def _folderRecurse(self, svcRec, client, parent_id, parent_path, id_cache): + assert(not parent_path.endswith("/")) + page_token = None + while True: + try: + param = {"maxResults": 1000, "q": "trashed = false and '%s' in parents" % parent_id, "fields": "items(id,version,parents(id,isRoot,kind),title,md5Checksum,mimeType),kind,nextLink,nextPageToken"} + if page_token: + param["pageToken"] = page_token + children = client.files().list(**param).execute() + + for child in children.get("items", []): + ctitle = child["title"] + cid = child["id"] + cpath = parent_path + "/" + ctitle + is_folder = child.get("mimeType") == _FOLDER_MIMETYPE + is_supported_file = any([ctitle.lower().endswith("."+x) for x in _MIMETYPES.keys()]) + + if not is_folder and not is_supported_file: + continue + + cache_entry = {"ID": cid, "Parent": parent_id, "Name": ctitle} + if cache_entry not in id_cache: + if any([x["ID"] == cid for x in id_cache]): + # Cached different name or parent info for this ID, maybe moved + logger.debug("ID %s seems to have changed name, updating cache" % cid) + id_cache[:] = (x for x in id_cache if x["ID"] != cid) + if any([x["Parent"] == parent_id and x["Name"] == ctitle for x in id_cache]): + logger.debug("%s/%s seems to have changed id, updating cache" % (parent_id, ctitle)) + # Cached different info for this parent/name + id_cache[:] = (x for x in id_cache if not (x["Parent"] == parent_id and x["Name"] != ctitle)) + id_cache.append(cache_entry) + + if is_folder: + yield from self._folderRecurse(svcRec, client, cid, cpath, id_cache) + elif is_supported_file: + yield (cpath, cpath.replace(parent_path, "", 1), cid, child["version"]) + + page_token = children.get("nextPageToken") + if not page_token: + break + except errors.HttpError as error: + raise APIException("Error listing files in Google Drive - %s" % error) diff --git a/tapiriik/services/__init__.py b/tapiriik/services/__init__.py index c4bd65421..204822f61 100644 --- a/tapiriik/services/__init__.py +++ b/tapiriik/services/__init__.py @@ -8,6 +8,8 @@ Endomondo = EndomondoService() from tapiriik.services.Dropbox import DropboxService Dropbox = DropboxService() +from tapiriik.services.GoogleDrive import GoogleDriveService +GoogleDrive = GoogleDriveService() from tapiriik.services.GarminConnect import GarminConnectService GarminConnect = GarminConnectService() from tapiriik.services.SportTracks import SportTracksService diff --git a/tapiriik/services/service.py b/tapiriik/services/service.py index 93af9e8d2..3da36ceb1 100644 --- a/tapiriik/services/service.py +++ b/tapiriik/services/service.py @@ -34,6 +34,7 @@ def List(): Endomondo, SportTracks, Dropbox, + GoogleDrive, TrainingPeaks, RideWithGPS, TrainAsONE, @@ -62,6 +63,7 @@ def PreferredDownloadPriorityList(): SportTracks, # Pretty much equivalent to GC, no temperature (not that GC temperature works all thar well now, but I digress) TrainingPeaks, # No seperate run cadence, but has temperature Dropbox, # Equivalent to any of the above + GoogleDrive, RideWithGPS, # Uses TCX for everything, so same as Dropbox TrainAsONE, VeloHero, # PWX export, no temperature diff --git a/tapiriik/services/storage_service_base.py b/tapiriik/services/storage_service_base.py new file mode 100644 index 000000000..44a267bea --- /dev/null +++ b/tapiriik/services/storage_service_base.py @@ -0,0 +1,257 @@ +from tapiriik.services.service_base import ServiceBase +from tapiriik.services.api import UserException, UserExceptionType, APIExcludeActivity +from tapiriik.services.interchange import ActivityType, UploadedActivity +from tapiriik.services.exception_tools import strip_context +from tapiriik.services.gpx import GPXIO +from tapiriik.services.tcx import TCXIO +import re +import lxml +from datetime import datetime +import logging +logger = logging.getLogger(__name__) + +class StorageServiceBase(ServiceBase): + """ + A base class for all storage-like services (Dropbox, Google Drive, etc) + """ + + # Maximum path length that this service will accept. Default is from Dropbox. + MaxPathLen = 255 + + ReceivesStationaryActivities = False + + ActivityTaggingTable = { # earlier items have precedence over + ActivityType.Running: "run(?!tastic)", + ActivityType.MountainBiking: "m(oun)?t(ai)?n\s*bik(e|ing)", + ActivityType.Cycling: "(cycl(e|ing)|bik(e|ing))", + ActivityType.Walking: "walk", + ActivityType.Hiking: "hik(e|ing)", + ActivityType.DownhillSkiing: "(downhill|down(hill)?\s*ski(ing)?)", + ActivityType.CrossCountrySkiing: "(xc|cross.*country)\s*ski(ing)?", + ActivityType.Snowboarding:"snowboard(ing)?", + ActivityType.Skating: "skat(e|ing)?", + ActivityType.Swimming: "swim", + ActivityType.Wheelchair: "wheelchair", + ActivityType.Rowing: "row", + ActivityType.Elliptical: "elliptical", + ActivityType.RollerSkiing: "rollerskiing", + ActivityType.StrengthTraining: "strength( ?training)?", + ActivityType.Gym: "(gym|workout)", + ActivityType.Climbing: "climb(ing)?", + ActivityType.StandUpPaddling: "(sup|stand( |-)/up ?paddl(e|ing))", + ActivityType.Other: "(other|unknown)" + } + + SupportsHR = SupportsCadence = True + + SupportedActivities = ActivityTaggingTable.keys() + + def GetClient(self, svcRec): + """ Return a client object for the service. Will be passed back in to the various calls below """ + raise NotImplementedError() + + def GetFileContents(self, svcRec, client, path, storageid, cache): + """ Return a tuple of (contents, version_number) for a given path. If this file was just enumerated, ++ storageid will be given (see EnumerateFiles below), otherwise it will be None. """ + raise NotImplementedError() + + def PutFileContents(self, svcRec, client, path, contents, cache): + """ Write the contents to the file and return a version number for the newly written file. """ + raise NotImplementedError() + + def MoveFile(self, svcRec, client, path, destPath, cache): + """ Move/rename the file "path" to "destPath". """ + raise NotImplementedError() + + def ServiceCacheDB(self): + """ Get the cache DB object for this service, eg, cachedb.dropbox_cache """ + raise NotImplementedError() + + def SyncRoot(self, svcRec): + """ Get the root directory on the service that we will be syncing to, eg, "/tapiriik/" """ + raise NotImplementedError() + + def EnumerateFiles(self, svcRec, client, root, cache): + """ List the files available on the remote (applying some filtering, + and using cache as appropriate. Should yield tuples of: + (fullPath, relPath, fileid) + where storageid is some unique id that can be passed back to GetFileContents above. + """ + raise NotImplementedError() + + def _tagActivity(self, text): + for act, pattern in self.ActivityTaggingTable.items(): + if re.search(pattern, text, re.IGNORECASE): + return act + return None + + def _getActivity(self, serviceRecord, client, path, storageid, cache, base_activity=None): + activityData, revision = self.GetFileContents(serviceRecord, client, path, storageid, cache) + + try: + if path.lower().endswith(".tcx"): + act = TCXIO.Parse(activityData, base_activity) + else: + act = GPXIO.Parse(activityData, base_activity) + except ValueError as e: + raise APIExcludeActivity("Invalid GPX/TCX " + str(e), activity_id=path, user_exception=UserException(UserExceptionType.Corrupt)) + except lxml.etree.XMLSyntaxError as e: + raise APIExcludeActivity("LXML parse error " + str(e), activity_id=path, user_exception=UserException(UserExceptionType.Corrupt)) + return act, revision + + def _getCache(self, svcRec): + cache = self.ServiceCacheDB().find_one({"ExternalID": svcRec.ExternalID}, {"ExternalID": True, "Activities": True}) + if cache is None: + cache = {"ExternalID": svcRec.ExternalID, "Activities": {}} + return cache + + def _storeCache(self, svcRec, cache): + if "_id" in cache: + self.ServiceCacheDB().save(cache) + else: + insert_result = self.ServiceCacheDB().insert(cache) + cache["_id"] = insert_result.inserted_id + + def DownloadActivityList(self, svcRec, exhaustive=False): + client = self.GetClient(svcRec) + + cache = self._getCache(svcRec) + syncRoot = self.SyncRoot(svcRec) + + activities = [] + exclusions = [] + discovered_activity_cache_keys = set() + + for (path, relPath, storageid, revision) in self.EnumerateFiles(svcRec, client, syncRoot, cache): + hashedRelPath = self._hash_path(relPath) + discovered_activity_cache_keys.add(hashedRelPath) + if hashedRelPath in cache["Activities"]: + existing = cache["Activities"][hashedRelPath] + else: + existing = None + + if existing and existing["Rev"] == revision: + # don't need entire activity loaded here, just UID + act = UploadedActivity() + act.UID = existing["UID"] + try: + act.StartTime = datetime.strptime(existing["StartTime"], "%H:%M:%S %d %m %Y %z") + except: + act.StartTime = datetime.strptime(existing["StartTime"], "%H:%M:%S %d %m %Y") # Exactly one user has managed to break %z :S + if "EndTime" in existing: # some cached activities may not have this, it is not essential + act.EndTime = datetime.strptime(existing["EndTime"], "%H:%M:%S %d %m %Y %z") + else: + logger.debug("Retrieving %s (%s)" % (path, "outdated meta cache" if existing else "not in meta cache")) + # get the full activity + try: + act, rev = self._getActivity(svcRec, client, path, storageid, cache) + except APIExcludeActivity as e: + logger.info("Encountered APIExcludeActivity %s" % str(e)) + exclusions.append(strip_context(e)) + continue + + try: + act.EnsureTZ() + except: + pass # We tried. + + act.Laps = [] # Yeah, I'll process the activity twice, but at this point CPU time is more plentiful than RAM. + cache["Activities"][hashedRelPath] = {"Rev": rev, "UID": act.UID, "StartTime": act.StartTime.strftime("%H:%M:%S %d %m %Y %z"), "EndTime": act.EndTime.strftime("%H:%M:%S %d %m %Y %z")} + # Incrementally update the cache db. + # Otherwise, if we crash later on in listing + # (due to OOM or similar), we'll never make progress on this account. + self._storeCache(svcRec, cache) + tagRes = self._tagActivity(relPath) + act.ServiceData = {"Path": path, "Tagged": tagRes is not None} + + act.Type = tagRes if tagRes is not None else ActivityType.Other + + logger.debug("Activity s/t %s" % act.StartTime) + + activities.append(act) + + # Drop deleted activities' records from cache. + all_activity_cache_keys = set(cache["Activities"].keys()) + for deleted_key in all_activity_cache_keys - discovered_activity_cache_keys: + del cache["Activities"][deleted_key] + + self._storeCache(svcRec, cache) + + return activities, exclusions + + def DownloadActivity(self, serviceRecord, activity): + # activity might not be populated at this point, still possible to bail out + if not activity.ServiceData["Tagged"]: + if not (hasattr(serviceRecord, "Config") and "UploadUntagged" in serviceRecord.Config and serviceRecord.Config["UploadUntagged"]): + raise APIExcludeActivity("Activity untagged", permanent=False, activity_id=activity.ServiceData["Path"], user_exception=UserException(UserExceptionType.Untagged)) + + path = activity.ServiceData["Path"] + client = self.GetClient(serviceRecord) + cache = self._getCache(serviceRecord) + activity, rev = self._getActivity(serviceRecord, client, path, None, cache) + self._storeCache(serviceRecord, cache) + + # Storage-based services don't support stationary activities yet. + if activity.CountTotalWaypoints() <= 1: + raise APIExcludeActivity("Too few waypoints", activity_id=path, user_exception=UserException(UserExceptionType.Corrupt)) + + return activity + + def _hash_path(self, path): + import hashlib + # Can't use the raw file path as a dict key in Mongo, since who knows what'll be in it (periods especially) + # Used the activity UID for the longest time, but that causes inefficiency when >1 file represents the same activity + # So, this: + csp = hashlib.new("md5") + csp.update(path.encode("utf-8")) + return csp.hexdigest() + + def _clean_activity_name(self, name): + # https://www.dropbox.com/help/145/en + # Nothing outside BMP is allowed, either, apparently. + return re.sub("[@><:\"|?*]|[^\U00000000-\U0000d7ff\U0000e000-\U0000ffff]", "", re.sub("[/\\\]", "-", name)) + + def _format_file_name(self, format, activity): + name_pattern = re.compile("#NAME", re.IGNORECASE) + type_pattern = re.compile("#TYPE", re.IGNORECASE) + name = activity.StartTime.strftime(format) + name = name_pattern.sub(self._clean_activity_name(activity.Name) if activity.Name and len(activity.Name) > 0 and activity.Name.lower() != activity.Type.lower() else "", name) + name = type_pattern.sub(activity.Type, name) + name = re.sub(r"([\W_])\1+", r"\1", name) # To handle cases where the activity is unnamed + name = re.sub(r"^([\W_])|([\W_])$", "", name) # To deal with trailing-seperator weirdness (repeated seperator handled by prev regexp) + return name + + def UploadActivity(self, serviceRecord, activity): + format = serviceRecord.GetConfiguration()["Format"] + if format == "tcx": + if "tcx" in activity.PrerenderedFormats: + logger.debug("Using prerendered TCX") + data = activity.PrerenderedFormats["tcx"] + else: + data = TCXIO.Dump(activity) + else: + if "gpx" in activity.PrerenderedFormats: + logger.debug("Using prerendered GPX") + data = activity.PrerenderedFormats["gpx"] + else: + data = GPXIO.Dump(activity) + + fname = self._format_file_name(serviceRecord.GetConfiguration()["Filename"], activity)[:self.MaxPathLen-5] + "." + format # max path length, and we have to save for the file ext (4) and the leading slash (1) + + client = self.GetClient(serviceRecord) + + syncRoot = self.SyncRoot(serviceRecord) + if not syncRoot.endswith("/"): + syncRoot += "/" + fpath = syncRoot + fname + + cache = self._getCache(serviceRecord) + revision = self.PutFileContents(serviceRecord, client, fpath, data.encode("UTF-8"), cache) + + # Fake this in so we don't immediately redownload the activity next time 'round + cache["Activities"][self._hash_path("/" + fname)] = {"Rev": revision, "UID": activity.UID, "StartTime": activity.StartTime.strftime("%H:%M:%S %d %m %Y %z"), "EndTime": activity.EndTime.strftime("%H:%M:%S %d %m %Y %z")} + self._storeCache(serviceRecord, cache) + return fpath + + def DeleteCachedData(self, serviceRecord): + self.ServiceCacheDB().remove({"ExternalID": serviceRecord.ExternalID}) diff --git a/tapiriik/web/static/img/services/googledrive.png b/tapiriik/web/static/img/services/googledrive.png new file mode 100644 index 000000000..13cdda50a Binary files /dev/null and b/tapiriik/web/static/img/services/googledrive.png differ diff --git a/tapiriik/web/static/img/services/googledrive_l.png b/tapiriik/web/static/img/services/googledrive_l.png new file mode 100644 index 000000000..5cb8e8097 Binary files /dev/null and b/tapiriik/web/static/img/services/googledrive_l.png differ diff --git a/tapiriik/web/views/privacy.py b/tapiriik/web/views/privacy.py index 60d4532f5..383ba76fa 100644 --- a/tapiriik/web/views/privacy.py +++ b/tapiriik/web/views/privacy.py @@ -19,6 +19,7 @@ def privacy(request): services["strava"].update({"email": NO, "password": NO, "tokens": YES, "metadata": YES, "data":NO}) services["sporttracks"].update({"email": NO, "password": NO, "tokens": YES, "metadata": YES, "data":NO}) services["dropbox"].update({"email": NO, "password": NO, "tokens": YES, "metadata": YES, "data":NO}) + services["googledrive"].update({"email": NO, "password": NO, "tokens": YES, "metadata": YES, "data":CACHED}) services["runkeeper"].update({"email": NO, "password": NO, "tokens": YES, "metadata": YES, "data":NO}) services["rwgps"].update({"email": OPTIN, "password": OPTIN, "tokens": NO, "metadata": YES, "data":NO}) services["trainingpeaks"].update({"email": NO, "password": NO, "tokens": YES, "metadata": YES, "data":NO})