forked from MaxSSD/openai-telegram
-
Notifications
You must be signed in to change notification settings - Fork 0
/
text_utils.py
37 lines (28 loc) · 1.36 KB
/
text_utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
from typing import Dict, Set
from collections import defaultdict
def is_markdown(text: str):
return text.startswith("*") and text.endswith("*")
def entities_extract(message_text:str, entities) -> Dict[str, Set[str]]:
d = defaultdict(set)
for entity in entities:
d[entity["type"]].add(message_text[entity["offset"] :entity["offset"] + entity["length"]])
return d
class ReaderResult:
def __init__(self, kind: str, text_content: str, title: str, byline: str, length: int, excerpt: str, site_name: str, language: str):
self.kind = kind
self.text_content = text_content.strip()
self.title = title
self.byline = byline
self.length = length
self.excerpt = excerpt
self.site_name = site_name
self.language = language
def fetch_url(url:str)->ReaderResult:
import requests
payload = 'url='+url
headers = { 'Content-Type': 'application/x-www-form-urlencoded' }
response = requests.request("POST", "https://reader-mauve-three.vercel.app/api/extract", headers=headers, data=payload)
if response.status_code != 200:
raise Exception("Error fetching url: "+str(response.status_code))
data = response.json()
return ReaderResult(data['kind'], data['textContent'], data['title'], data['byline'], data['length'], data['excerpt'], data['siteName'], data['language'])