-
Notifications
You must be signed in to change notification settings - Fork 631
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add Ripper for nsfwalbum.com (#1691)
I've gone ahead and added a ripper for nsfwalbum.com. They host their images on many different image hosting sites so you must replace part of the image urls to get the full image instead of the thumbnails. There may be more hosts that I did not find, they can be added in later very easily.
- Loading branch information
Showing
1 changed file
with
108 additions
and
0 deletions.
There are no files selected for viewing
108 changes: 108 additions & 0 deletions
108
src/main/java/com/rarchives/ripme/ripper/rippers/NsfwAlbumRipper.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,108 @@ | ||
package com.rarchives.ripme.ripper.rippers; | ||
|
||
import java.net.MalformedURLException; | ||
import java.net.URL; | ||
import java.util.regex.Pattern; | ||
import java.io.IOException; | ||
import java.net.MalformedURLException; | ||
import java.net.URL; | ||
import java.util.ArrayList; | ||
import java.util.List; | ||
import java.util.regex.Matcher; | ||
|
||
import org.jsoup.*; | ||
import org.jsoup.nodes.Document; | ||
import org.jsoup.nodes.Element; | ||
import org.jsoup.select.Elements; | ||
|
||
import com.rarchives.ripme.ripper.AbstractHTMLRipper; | ||
import com.rarchives.ripme.utils.Http; | ||
|
||
//https://github.com/ripmeapp/ripme/wiki/How-To-Create-A-Ripper-for-HTML-websites | ||
public class NsfwAlbumRipper extends AbstractHTMLRipper | ||
{ | ||
private static final String HOST = "nsfwalbum"; | ||
private static final String DOMAIN = "nsfwalbum.com"; | ||
|
||
public NsfwAlbumRipper(URL url) throws IOException | ||
{ | ||
super(url); | ||
} | ||
|
||
@Override | ||
public String getHost() | ||
{ | ||
return HOST; | ||
} | ||
|
||
@Override | ||
public String getDomain() | ||
{ | ||
return DOMAIN; | ||
} | ||
|
||
@Override | ||
public String getGID(URL url) throws MalformedURLException | ||
{ | ||
Pattern pattern = Pattern.compile("(?!https:\\/\\/nsfwalbum.com\\/album\\/)\\d+"); | ||
Matcher matcher = pattern.matcher(url.toExternalForm()); | ||
|
||
if (matcher.find()) | ||
{ | ||
return matcher.group(); | ||
} | ||
|
||
throw new MalformedURLException("Expected https://nsfwalbum.com/album/albumid URL format nsfwalbum.com/album/albumid - got " + url + " instead."); | ||
} | ||
|
||
@Override | ||
public Document getFirstPage() throws IOException | ||
{ | ||
return Http.url(url).get(); | ||
} | ||
|
||
@Override | ||
public List<String> getURLsFromPage(Document doc) | ||
{ | ||
List<String> results = new ArrayList<String>(); | ||
|
||
Elements imgs = doc.select(".album img"); | ||
|
||
System.out.println(imgs.size() + " elements (thumbnails) found."); | ||
|
||
for (Element img : imgs) | ||
{ | ||
String thumbURL = img.attr("data-src"); | ||
String fullResURL = null; | ||
|
||
if (thumbURL.contains("imgspice.com")) | ||
{ | ||
fullResURL = thumbURL.replace("_t.jpg", ".jpg"); | ||
} | ||
else if (thumbURL.contains("imagetwist.com")) | ||
{ | ||
fullResURL = thumbURL.replace("/th/", "/i/"); | ||
} | ||
else if (thumbURL.contains("pixhost.com")) | ||
{ | ||
fullResURL = thumbURL.replace("https://t", "https://img"); | ||
fullResURL = fullResURL.replace("/thumbs/", "/images/"); | ||
} | ||
else if (thumbURL.contains("imx.to")) | ||
{ | ||
fullResURL = thumbURL.replace("/t/", "/i/"); | ||
} | ||
|
||
if (fullResURL != null) | ||
results.add(fullResURL); | ||
} | ||
|
||
return results; | ||
} | ||
|
||
@Override | ||
public void downloadURL(URL url, int index) | ||
{ | ||
addURLToDownload(url); | ||
} | ||
} |