Skip to content

Commit

Permalink
Merge pull request #184 from Stypox/duplicate-subscription-fix
Browse files Browse the repository at this point in the history
 Fix inconsistency in youtube channel urls
  • Loading branch information
theScrabi authored Aug 17, 2019
2 parents f9da8bc + 58e3996 commit 430da57
Show file tree
Hide file tree
Showing 10 changed files with 68 additions and 25 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@

@SuppressWarnings("WeakerAccess")
public class YoutubeChannelExtractor extends ChannelExtractor {
/*package-private*/ static final String CHANNEL_URL_BASE = "https://www.youtube.com/channel/";
private static final String CHANNEL_FEED_BASE = "https://www.youtube.com/feeds/videos.xml?channel_id=";
private static final String CHANNEL_URL_PARAMETERS = "/videos?view=0&flow=list&sort=dd&live_view=10000";

Expand All @@ -72,7 +73,7 @@ public String getNextPageUrl() throws ExtractionException {
@Override
public String getUrl() throws ParsingException {
try {
return "https://www.youtube.com/channel/" + getId();
return CHANNEL_URL_BASE + getId();
} catch (ParsingException e) {
return super.getUrl();
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,9 @@
import org.schabi.newpipe.extractor.exceptions.ParsingException;
import org.schabi.newpipe.extractor.utils.Utils;

import java.util.regex.Matcher;
import java.util.regex.Pattern;

/*
* Created by Christian Schabesberger on 12.02.17.
*
Expand Down Expand Up @@ -53,8 +56,20 @@ public String getName() throws ParsingException {

@Override
public String getUrl() throws ParsingException {
return el.select("a[class*=\"yt-uix-tile-link\"]").first()
.attr("abs:href");
String buttonTrackingUrl = el.select("button[class*=\"yt-uix-button\"]").first()
.attr("abs:data-href");

Pattern channelIdPattern = Pattern.compile("(?:.*?)\\%252Fchannel\\%252F([A-Za-z0-9\\-\\_]+)(?:.*)");
Matcher match = channelIdPattern.matcher(buttonTrackingUrl);

if (match.matches()) {
return YoutubeChannelExtractor.CHANNEL_URL_BASE + match.group(1);
} else {
// fallback method just in case youtube changes things; it should never run and tests will fail
// provides an url with "/user/NAME", that is inconsistent with stream and channel extractor
return el.select("a[class*=\"yt-uix-tile-link\"]").first()
.attr("abs:href");
}
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ public String getName() throws ParsingException {
try {
return doc.select("div[id=pl-header] h1[class=pl-header-title]").first().text();
} catch (Exception e) {
throw new ParsingException("Could not get playlist name");
throw new ParsingException("Could not get playlist name", e);
}
}

Expand All @@ -59,7 +59,7 @@ public String getThumbnailUrl() throws ParsingException {
try {
return doc.select("div[id=pl-header] div[class=pl-header-thumb] img").first().attr("abs:src");
} catch (Exception e) {
throw new ParsingException("Could not get playlist thumbnail");
throw new ParsingException("Could not get playlist thumbnail", e);
}
}

Expand All @@ -72,9 +72,11 @@ public String getBannerUrl() {
@Override
public String getUploaderUrl() throws ParsingException {
try {
return doc.select("ul[class=\"pl-header-details\"] li").first().select("a").first().attr("abs:href");
return YoutubeChannelExtractor.CHANNEL_URL_BASE +
doc.select("button[class*=\"yt-uix-subscription-button\"]")
.first().attr("data-channel-external-id");
} catch (Exception e) {
throw new ParsingException("Could not get playlist uploader name");
throw new ParsingException("Could not get playlist uploader url", e);
}
}

Expand All @@ -83,7 +85,7 @@ public String getUploaderName() throws ParsingException {
try {
return doc.select("span[class=\"qualified-channel-title-text\"]").first().select("a").first().text();
} catch (Exception e) {
throw new ParsingException("Could not get playlist uploader name");
throw new ParsingException("Could not get playlist uploader name", e);
}
}

Expand All @@ -92,7 +94,7 @@ public String getUploaderAvatarUrl() throws ParsingException {
try {
return doc.select("div[id=gh-banner] img[class=channel-header-profile-image]").first().attr("abs:src");
} catch (Exception e) {
throw new ParsingException("Could not get playlist uploader avatar");
throw new ParsingException("Could not get playlist uploader avatar", e);
}
}

Expand Down Expand Up @@ -248,6 +250,8 @@ public String getUploaderName() throws ParsingException {

@Override
public String getUploaderUrl() throws ParsingException {
// this url is not always in the form "/channel/..."
// sometimes Youtube provides urls in the from "/user/..."
return getUploaderLink().attr("abs:href");
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,8 @@ public String getUploaderName() throws ParsingException {

@Override
public String getUploaderUrl() throws ParsingException {
// this url is not always in the form "/channel/..."
// sometimes Youtube provides urls in the from "/user/..."
try {
try {
return item.select("div[class=\"yt-lockup-byline\"]").first()
Expand All @@ -119,7 +121,7 @@ public String getUploaderUrl() throws ParsingException {
.text().split(" - ")[0];
} catch (Exception e) {
System.out.println(item.html());
throw new ParsingException("Could not get uploader", e);
throw new ParsingException("Could not get uploader url", e);
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,8 @@ public String getUploaderUrl() throws ParsingException {
}

private Element getUploaderLink() {
// this url is not always in the form "/channel/..."
// sometimes Youtube provides urls in the from "/user/..."
Element uploaderEl = el.select("div[class*=\"yt-lockup-byline \"]").first();
return uploaderEl.select("a").first();
}
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
package org.schabi.newpipe.extractor.services.soundcloud;

import org.hamcrest.CoreMatchers;
import org.junit.BeforeClass;
import org.junit.Ignore;
import org.junit.Test;
Expand Down Expand Up @@ -119,14 +120,14 @@ public void testStreamCount() {
}
}

public static class RandomHouseDanceMusic implements BasePlaylistExtractorTest {
public static class RandomHouseMusic implements BasePlaylistExtractorTest {
private static SoundcloudPlaylistExtractor extractor;

@BeforeClass
public static void setUp() throws Exception {
NewPipe.init(Downloader.getInstance(), new Localization("GB", "en"));
extractor = (SoundcloudPlaylistExtractor) SoundCloud
.getPlaylistExtractor("https://soundcloud.com/hunter-leader/sets/house-electro-dance-music-2");
.getPlaylistExtractor("https://soundcloud.com/micky96/sets/house");
extractor.fetchPage();
}

Expand All @@ -141,22 +142,22 @@ public void testServiceId() {

@Test
public void testName() {
assertEquals("House, Electro , Dance Music 2", extractor.getName());
assertEquals("House", extractor.getName());
}

@Test
public void testId() {
assertEquals("310980722", extractor.getId());
assertEquals("123062856", extractor.getId());
}

@Test
public void testUrl() throws Exception {
assertEquals("https://soundcloud.com/hunter-leader/sets/house-electro-dance-music-2", extractor.getUrl());
assertEquals("https://soundcloud.com/micky96/sets/house", extractor.getUrl());
}

@Test
public void testOriginalUrl() throws Exception {
assertEquals("https://soundcloud.com/hunter-leader/sets/house-electro-dance-music-2", extractor.getOriginalUrl());
assertEquals("https://soundcloud.com/micky96/sets/house", extractor.getOriginalUrl());
}

/*//////////////////////////////////////////////////////////////////////////
Expand All @@ -182,7 +183,7 @@ public void testThumbnailUrl() {
assertIsSecureUrl(extractor.getThumbnailUrl());
}

@Ignore
@Ignore("not implemented")
@Test
public void testBannerUrl() {
assertIsSecureUrl(extractor.getBannerUrl());
Expand All @@ -192,12 +193,12 @@ public void testBannerUrl() {
public void testUploaderUrl() {
final String uploaderUrl = extractor.getUploaderUrl();
assertIsSecureUrl(uploaderUrl);
assertTrue(uploaderUrl, uploaderUrl.contains("hunter-leader"));
assertThat(uploaderUrl, CoreMatchers.containsString("micky96"));
}

@Test
public void testUploaderName() {
assertEquals("Gosu", extractor.getUploaderName());
assertEquals("_mickyyy", extractor.getUploaderName());
}

@Test
Expand Down Expand Up @@ -266,6 +267,7 @@ public void testOriginalUrl() throws Exception {
// ListExtractor
//////////////////////////////////////////////////////////////////////////*/

@Ignore
@Test
public void testRelatedItems() throws Exception {
defaultTestRelatedItems(extractor, SoundCloud.getServiceId());
Expand All @@ -287,6 +289,7 @@ public void testMoreRelatedItems() throws Exception {
// PlaylistExtractor
//////////////////////////////////////////////////////////////////////////*/

@Ignore
@Test
public void testThumbnailUrl() {
assertIsSecureUrl(extractor.getThumbnailUrl());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ public void testBannerUrl() throws Exception {

@Test
public void testUploaderUrl() throws Exception {
assertTrue(extractor.getUploaderUrl().contains("youtube.com"));
assertEquals("https://www.youtube.com/channel/UCs72iRpTEuwV3y6pdWYLgiw", extractor.getUploaderUrl());
}

@Test
Expand Down Expand Up @@ -185,8 +185,8 @@ public void testRelatedItems() throws Exception {
public void testMoreRelatedItems() throws Exception {
ListExtractor.InfoItemsPage<StreamInfoItem> currentPage
= defaultTestMoreItems(extractor, ServiceList.YouTube.getServiceId());
// Test for 2 more levels

// test for 2 more levels
for (int i = 0; i < 2; i++) {
currentPage = extractor.getPage(currentPage.getNextPageUrl());
defaultTestListOfItems(YouTube.getServiceId(), currentPage.getItems(), currentPage.getErrors());
Expand Down Expand Up @@ -214,7 +214,7 @@ public void testBannerUrl() throws Exception {

@Test
public void testUploaderUrl() throws Exception {
assertTrue(extractor.getUploaderUrl().contains("youtube.com"));
assertEquals("https://www.youtube.com/channel/UCHSPWoY1J5fbDVbcnyeqwdw", extractor.getUploaderUrl());
}

@Test
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ public void testGetDescription() throws ParsingException {
}

@Test
public void testGetFullLinksInDescriptlion() throws ParsingException {
public void testGetFullLinksInDescription() throws ParsingException {
assertTrue(extractor.getDescription().contains("http://adele.com"));
assertFalse(extractor.getDescription().contains("http://smarturl.it/SubscribeAdele?IQi..."));
}
Expand Down Expand Up @@ -111,7 +111,7 @@ public void testGetUploadDate() throws ParsingException {

@Test
public void testGetUploaderUrl() throws ParsingException {
assertTrue(extractor.getUploaderUrl().length() > 0);
assertEquals("https://www.youtube.com/channel/UCsRM0YB_dabtEPGPTKo-gcw", extractor.getUploaderUrl());
}

@Test
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
package org.schabi.newpipe.extractor.services.youtube.search;

import org.hamcrest.CoreMatchers;
import org.junit.BeforeClass;
import org.junit.Ignore;
import org.junit.Test;
Expand Down Expand Up @@ -63,4 +64,19 @@ public void testOnlyContainChannels() {
}
}
}

@Test
public void testChannelUrl() {
for(InfoItem item : itemsPage.getItems()) {
if (item instanceof ChannelInfoItem) {
ChannelInfoItem channel = (ChannelInfoItem) item;

if (channel.getSubscriberCount() > 5e7) { // the real PewDiePie
assertEquals("https://www.youtube.com/channel/UC-lHJZR3Gqxm24_Vd_AJ5Yw", item.getUrl());
} else {
assertThat(item.getUrl(), CoreMatchers.startsWith("https://www.youtube.com/channel/"));
}
}
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ public void testResultList_FirstElement() {
assertTrue((firstInfoItem instanceof ChannelInfoItem)
|| (secondInfoItem instanceof ChannelInfoItem));
assertEquals("name", "PewDiePie", channelItem.getName());
assertEquals("url","https://www.youtube.com/user/PewDiePie", channelItem.getUrl());
assertEquals("url", "https://www.youtube.com/channel/UC-lHJZR3Gqxm24_Vd_AJ5Yw", channelItem.getUrl());
}

@Test
Expand Down

0 comments on commit 430da57

Please sign in to comment.