diff --git a/tests/test_web.py b/tests/test_web.py index 2d2e161..646e5e8 100644 --- a/tests/test_web.py +++ b/tests/test_web.py @@ -1,8 +1,7 @@ import pytest +from bs4 import BeautifulSoup from httpx import BasicAuth -from yourss.youtube.utils import bs_parse - @pytest.mark.anyio async def test_default(client): @@ -58,7 +57,7 @@ async def test_page_content(client): resp = await client.get("/" + ",".join(names)) assert resp.status_code == 200 - soup = bs_parse(resp.text) + soup = BeautifulSoup(resp.text, features="html.parser") assert len(soup.find_all("div", class_="yourss-filterable")) == 45 # when no valid feed given, should return 404 diff --git a/tests/test_youtube.py b/tests/test_youtube.py index 7c3f0e4..18522a3 100644 --- a/tests/test_youtube.py +++ b/tests/test_youtube.py @@ -1,20 +1,15 @@ from http.cookiejar import CookieJar import pytest +from bs4 import BeautifulSoup from httpx import AsyncClient -from yourss.youtube import ( - YoutubeMetadata, - YoutubeRssApi, - YoutubeWebApi, -) -from yourss.youtube.scrapper import VideoScrapper -from yourss.youtube.utils import bs_parse +from yourss.youtube import PageScrapper, VideoScrapper, YoutubeApi @pytest.mark.asyncio(loop_scope="module") async def test_rgpd(): - api = YoutubeWebApi() + api = YoutubeApi() url = "/@jonnygiger" @@ -22,7 +17,7 @@ async def test_rgpd(): assert resp.status_code == 200 assert ( len( - bs_parse(resp.text).find_all( + BeautifulSoup(resp.text, features="html.parser").find_all( "form", attrs={"method": "POST", "action": "https://consent.youtube.com/save"}, ) @@ -34,7 +29,7 @@ async def test_rgpd(): assert resp.status_code == 200 assert ( len( - bs_parse(resp.text).find_all( + BeautifulSoup(resp.text, features="html.parser").find_all( "form", attrs={"method": "POST", "action": "https://consent.youtube.com/save"}, ) @@ -45,7 +40,7 @@ async def test_rgpd(): @pytest.mark.asyncio(loop_scope="module") async def test_rss_channel(): - api = YoutubeRssApi() + api = YoutubeApi() feed = await api.get_channel_rss("UCVooVnzQxPSTXTMzSi1s6uw") assert feed.title == "Jonny Giger" @@ -53,7 +48,7 @@ async def test_rss_channel(): @pytest.mark.asyncio(loop_scope="module") async def test_rss_playlist(): - api = YoutubeRssApi() + api = YoutubeApi() feed = await api.get_playlist_rss("PLw-vK1_d04zZCal3yMX_T23h5nDJ2toTk") assert feed.title == "IMPOSSIBLE TRICKS OF RODNEY MULLEN" @@ -61,35 +56,33 @@ async def test_rss_playlist(): @pytest.mark.asyncio(loop_scope="module") async def test_metadata_channel(): - api = YoutubeWebApi(AsyncClient(cookies=CookieJar())) + api = YoutubeApi(AsyncClient(cookies=CookieJar())) resp = await api.get_homepage("UCVooVnzQxPSTXTMzSi1s6uw") - meta = YoutubeMetadata.from_response(resp) + page = PageScrapper.from_response(resp) + meta = page.get_metadata() assert meta.title == "Jonny Giger" assert meta.channel_id == "UCVooVnzQxPSTXTMzSi1s6uw" - assert ( - meta.url.geturl() == "https://www.youtube.com/channel/UCVooVnzQxPSTXTMzSi1s6uw" - ) + assert meta.url == "https://www.youtube.com/channel/UCVooVnzQxPSTXTMzSi1s6uw" assert meta.avatar_url is not None @pytest.mark.asyncio(loop_scope="module") async def test_metadata_user(): - api = YoutubeWebApi(AsyncClient(cookies=CookieJar())) + api = YoutubeApi(AsyncClient(cookies=CookieJar())) resp = await api.get_homepage("@jonnygiger") - meta = YoutubeMetadata.from_response(resp) + page = PageScrapper.from_response(resp) + meta = page.get_metadata() assert meta.title == "Jonny Giger" assert meta.channel_id == "UCVooVnzQxPSTXTMzSi1s6uw" - assert ( - meta.url.geturl() == "https://www.youtube.com/channel/UCVooVnzQxPSTXTMzSi1s6uw" - ) + assert meta.url == "https://www.youtube.com/channel/UCVooVnzQxPSTXTMzSi1s6uw" assert meta.avatar_url is not None @pytest.mark.asyncio(loop_scope="module") async def test_scrap_videos(): - scrapper = VideoScrapper(YoutubeWebApi()) + scrapper = VideoScrapper() page_iterator = scrapper.iter_videos("UCVooVnzQxPSTXTMzSi1s6uw") page1 = await anext(page_iterator) diff --git a/yourss/async_utils.py b/yourss/async_utils.py index 84f8a55..c915e37 100644 --- a/yourss/async_utils.py +++ b/yourss/async_utils.py @@ -3,45 +3,40 @@ from .youtube import ( Feed, - YoutubeMetadata, - YoutubeRssApi, - YoutubeWebApi, + YoutubeApi, is_channel_id, is_playlist_id, is_user, ) +from .youtube.scrapper import PageScrapper -async def _fetch_feed( - name: str, *, rss_api: YoutubeRssApi, web_api: YoutubeWebApi -) -> Feed: +async def _fetch_feed(name: str, *, api: YoutubeApi) -> Feed: if is_playlist_id(name): - return await rss_api.get_playlist_rss(name) + return await api.get_playlist_rss(name) # if given id is a name, get the channel id if is_user(name): - meta = YoutubeMetadata.from_response(await web_api.get_homepage(name)) + page = PageScrapper.from_response(await api.get_homepage(name)) + meta = page.get_metadata() name = meta.channel_id # check valid channel id if not is_channel_id(name): raise ValueError(f"Invalid channel id: {name}") - return await rss_api.get_channel_rss(name) + return await api.get_channel_rss(name) async def afetch_feeds( - names: List[str], *, rss_api: YoutubeRssApi, web_api: YoutubeWebApi + names: List[str], *, api: YoutubeApi ) -> Dict[str, Feed | BaseException]: return { name: task for name, task in zip( names, await asyncio.gather( - *[ - _fetch_feed(name, rss_api=rss_api, web_api=web_api) - for name in names - ], + *[_fetch_feed(name, api=api) for name in names], return_exceptions=True, ), ) diff --git a/yourss/jsonutils.py b/yourss/jsonutils.py deleted file mode 100644 index da5b0f9..0000000 --- a/yourss/jsonutils.py +++ /dev/null @@ -1,16 +0,0 @@ -from typing import Dict, Iterator, Type, TypeVar - -from jsonpath_ng import parse - -T = TypeVar("T") - - -def json_iter(path: str, payload: Dict, cls: Type[T] | None = None) -> Iterator[T]: - for match in parse(path).find(payload): - out = match.value - if out is not None and (cls is None or isinstance(out, cls)): - yield out - - -def json_first(path: str, payload: Dict, cls: Type[T] | None = None) -> T: - return next(json_iter(path, payload, cls=cls)) diff --git a/yourss/routers/proxy.py b/yourss/routers/proxy.py index a8165aa..8458821 100644 --- a/yourss/routers/proxy.py +++ b/yourss/routers/proxy.py @@ -1,36 +1,30 @@ -from fastapi import APIRouter, Depends, HTTPException +from fastapi import APIRouter, HTTPException from fastapi.responses import RedirectResponse -from httpx import AsyncClient from pydantic import PositiveInt from starlette.status import HTTP_404_NOT_FOUND from ..youtube import ( - YoutubeMetadata, - YoutubeRssApi, - YoutubeWebApi, + PageScrapper, + YoutubeApi, is_channel_id, is_playlist_id, is_user, ) from .schema import ChannelId, Playlist_Id, UserId -from .utils import force_https, get_youtube_web_client +from .utils import force_https router = APIRouter() @router.get("/rss/{name}", response_class=RedirectResponse) -async def rss_feed( - name: UserId | ChannelId | Playlist_Id, - yt_client: AsyncClient = Depends(get_youtube_web_client), -): - api = YoutubeRssApi() - webapi = YoutubeWebApi(yt_client) +async def rss_feed(name: UserId | ChannelId | Playlist_Id): + api = YoutubeApi() feed = None # if a user is provided, get the channel id if is_user(name): - homepage = await webapi.get_homepage(name) - meta = YoutubeMetadata.from_response(homepage) + homepage = PageScrapper.from_response(await api.get_homepage(name)) + meta = homepage.get_metadata() name = meta.channel_id if is_channel_id(name): @@ -46,13 +40,11 @@ async def rss_feed( @router.get("/avatar/{name}", response_class=RedirectResponse) -async def avatar( - name: UserId | ChannelId, yt_client: AsyncClient = Depends(get_youtube_web_client) -): - webapi = YoutubeWebApi(yt_client) +async def avatar(name: UserId | ChannelId): + api = YoutubeApi() - homepage = await webapi.get_homepage(name) - meta = YoutubeMetadata.from_response(homepage) + homepage = PageScrapper.from_response(await api.get_homepage(name)) + meta = homepage.get_metadata() if (url := meta.avatar_url) is None: raise HTTPException( @@ -62,15 +54,13 @@ async def avatar( @router.get("/home/{name}", response_class=RedirectResponse) -async def home( - name: UserId | ChannelId, yt_client: AsyncClient = Depends(get_youtube_web_client) -): - webapi = YoutubeWebApi(yt_client) +async def home(name: UserId | ChannelId): + api = YoutubeApi() - homepage = await webapi.get_homepage(name) - meta = YoutubeMetadata.from_response(homepage) + homepage = PageScrapper.from_response(await api.get_homepage(name)) + meta = homepage.get_metadata() - return RedirectResponse(meta.url.geturl()) + return RedirectResponse(meta.url) @router.get("/thumbnail/{video_id}", response_class=RedirectResponse) diff --git a/yourss/routers/utils.py b/yourss/routers/utils.py index 1ab744d..581475e 100644 --- a/yourss/routers/utils.py +++ b/yourss/routers/utils.py @@ -1,15 +1,7 @@ -from http.cookiejar import CookieJar -from typing import AsyncGenerator, Callable, List +from typing import Callable, List -from httpx import AsyncClient from starlette.templating import Jinja2Templates, _TemplateResponse -cookiejar = CookieJar() - - -async def get_youtube_web_client() -> AsyncGenerator[AsyncClient, None]: - yield AsyncClient(cookies=cookiejar) - def force_https(url: str) -> str: assert isinstance(url, str) diff --git a/yourss/routers/web.py b/yourss/routers/web.py index 51df3f6..a39292c 100644 --- a/yourss/routers/web.py +++ b/yourss/routers/web.py @@ -1,10 +1,11 @@ +import json from datetime import datetime +from typing import Annotated import arrow -from fastapi import APIRouter, Depends, HTTPException, Query, Request +from fastapi import APIRouter, Cookie, Depends, HTTPException, Query, Request from fastapi.responses import RedirectResponse from fastapi.templating import Jinja2Templates -from httpx import AsyncClient from jinja2 import Environment, FileSystemLoader from starlette.responses import HTMLResponse from starlette.status import HTTP_404_NOT_FOUND @@ -15,8 +16,9 @@ from ..schema import Theme, User from ..security import get_auth_user from ..settings import current_config, templates_folder -from ..youtube import Feed, YoutubeRssApi, YoutubeWebApi -from .utils import custom_template_response, get_youtube_web_client, parse_channel_names +from ..youtube import Feed, PageScrapper, VideoScrapper, YoutubeApi +from .schema import ChannelId, UserId +from .utils import custom_template_response, parse_channel_names def clean_title(text: str) -> str: @@ -33,13 +35,21 @@ def date_humanize(date: datetime) -> str: env = Environment(loader=FileSystemLoader(templates_folder)) env.filters["clean_title"] = clean_title env.filters["date_humanize"] = date_humanize -ViewTemplateResponse = custom_template_response( +RssTemplateResponse = custom_template_response( Jinja2Templates(env=env), - "view.html", + "rss.html", version=yourss.__version__, open_primary=current_config.open_primary, open_secondary=current_config.open_secondary, ) +ChannelTemplateResponse = custom_template_response( + Jinja2Templates(env=env), "channel.html", version=yourss.__version__ +) +ChannelVideosTemplateResponse = custom_template_response( + Jinja2Templates(env=env), + "partials/channel-videos-page.html", + version=yourss.__version__, +) router = APIRouter() @@ -47,7 +57,7 @@ def date_humanize(date: datetime) -> str: @router.get("/", response_class=RedirectResponse) async def root(): return RedirectResponse( - router.url_path_for("view_channels", channels=current_config.default_channels) + router.url_path_for("rss_channels", channels=current_config.default_channels) ) @@ -59,20 +69,16 @@ async def watch(video: str = Query(alias="v", min_length=11, max_length=11)): @router.get("/u/{username}", response_class=HTMLResponse) -async def get_user( - request: Request, - yt_client: AsyncClient = Depends(get_youtube_web_client), - theme: Theme | None = None, - user: User = Depends(get_auth_user), +async def rss_user( + request: Request, theme: Theme | None = None, user: User = Depends(get_auth_user) ): - feeds = await afetch_feeds( - user.channels, rss_api=YoutubeRssApi(), web_api=YoutubeWebApi(yt_client) - ) + api = YoutubeApi() + feeds = await afetch_feeds(user.channels, api=api) active_feeds = [f for f in feeds.values() if isinstance(f, Feed)] if len(active_feeds) == 0: raise HTTPException(status_code=HTTP_404_NOT_FOUND, detail="No channels found") - return ViewTemplateResponse( + return RssTemplateResponse( request=request, title=f"/u/{user.name}", feeds=active_feeds, @@ -81,24 +87,68 @@ async def get_user( @router.get("/{channels}", response_class=HTMLResponse) -async def view_channels( - request: Request, - channels: str, - yt_client: AsyncClient = Depends(get_youtube_web_client), - theme: Theme | None = None, -): - feeds = await afetch_feeds( - parse_channel_names(channels), - rss_api=YoutubeRssApi(), - web_api=YoutubeWebApi(yt_client), - ) +async def rss_channels(request: Request, channels: str, theme: Theme | None = None): + api = YoutubeApi() + feeds = await afetch_feeds(parse_channel_names(channels), api=api) active_feeds = [f for f in feeds.values() if isinstance(f, Feed)] if len(active_feeds) == 0: raise HTTPException(status_code=HTTP_404_NOT_FOUND, detail="No channels found") - return ViewTemplateResponse( + return RssTemplateResponse( request=request, title=", ".join(sorted(map(lambda f: f.title, active_feeds))), feeds=active_feeds, theme=theme or current_config.theme, ) + + +@router.get("/c/{channel}", response_class=HTMLResponse) +async def videos_channel( + request: Request, channel: ChannelId | UserId, theme: Theme | None = None +): + api = YoutubeApi() + homepage = PageScrapper.from_response( + await api.get_homepage(channel, suffix="/videos") + ) + metadata = homepage.get_metadata() + assert (client_data := homepage.find_client_data()) is not None + assert (browse_data := homepage.find_browse_data()) is not None + + videos = list(browse_data.iter_videos()) + if len(videos) == 0: + raise HTTPException( + status_code=HTTP_404_NOT_FOUND, + detail=f"No video found for channel {channel}", + ) + + out = ChannelTemplateResponse( + request=request, + metadata=metadata, + videos=videos, + theme=theme or current_config.theme, + click_tracking_params=browse_data.click_tracking_params, + continuation_token=browse_data.continuation_token, + ) + out.set_cookie("client_data", json.dumps(client_data)) + return out + + +@router.get("/htmx/videos", response_class=HTMLResponse) +async def next( + request: Request, + client_data: Annotated[str, Cookie()], + click_tracking_params: Annotated[str, Query()], + continuation_token: Annotated[str, Query()], +): + scrapper = VideoScrapper() + browse_data = await scrapper.get_next_page( + json.loads(client_data), click_tracking_params, continuation_token + ) + videos = list(browse_data.iter_videos()) + out = ChannelVideosTemplateResponse( + request=request, + videos=videos, + click_tracking_params=browse_data.click_tracking_params, + continuation_token=browse_data.continuation_token, + ) + return out diff --git a/yourss/templates/channel.html b/yourss/templates/channel.html new file mode 100644 index 0000000..cacc8a6 --- /dev/null +++ b/yourss/templates/channel.html @@ -0,0 +1,44 @@ + + + +
+