Skip to content

Commit

Permalink
[core] Add urljoin (#756)
Browse files Browse the repository at this point in the history
Adds php-urljoin from https://github.com/fluffy-critter/php-urljoin to replace the custom implementation of 'defaultLinkTo'
  • Loading branch information
em92 authored and logmanoriginal committed Aug 2, 2018
1 parent 9d0452d commit df58f5b
Show file tree
Hide file tree
Showing 3 changed files with 144 additions and 9 deletions.
11 changes: 11 additions & 0 deletions lib/RssBridge.php
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,17 @@
}
require_once $vendorLibSimpleHtmlDom;

$vendorLibPhpUrlJoin = __DIR__ . PATH_VENDOR . '/php-urljoin/src/urljoin.php';
if(!file_exists($vendorLibPhpUrlJoin)) {
throw new \HttpException('"php-urljoin" library is missing.
Get it from https://github.com/fluffy-critter/php-urljoin and place the script "urljoin.php" in '
. substr(PATH_VENDOR, 4)
. '/php-urljoin/src/',
500);
}
require_once $vendorLibPhpUrlJoin;


/* Example use
require_once __DIR__ . '/lib/RssBridge.php';
Expand Down
11 changes: 2 additions & 9 deletions lib/html.php
Original file line number Diff line number Diff line change
Expand Up @@ -42,18 +42,11 @@ function backgroundToImg($htmlContent) {

function defaultLinkTo($content, $server){
foreach($content->find('img') as $image) {
if(strpos($image->src, 'http') === false
&& strpos($image->src, '//') === false
&& strpos($image->src, 'data:') === false)
$image->src = $server . $image->src;
$image->src = urljoin($server, $image->src);
}

foreach($content->find('a') as $anchor) {
if(strpos($anchor->href, 'http') === false
&& strpos($anchor->href, '//') === false
&& strpos($anchor->href, '#') !== 0
&& strpos($anchor->href, '?') !== 0)
$anchor->href = $server . $anchor->href;
$anchor->href = urljoin($server, $anchor->href);
}

return $content;
Expand Down
131 changes: 131 additions & 0 deletions vendor/php-urljoin/src/urljoin.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,131 @@
<?php

/*
A spiritual port of Python's urlparse.urljoin() function to PHP. Why this isn't in the standard library is anyone's guess.
Author: fluffy, http://beesbuzz.biz/
Latest version at: https://github.com/plaidfluff/php-urljoin
*/

function urljoin($base, $rel) {
if (!$base) {
return $rel;
}

if (!$rel) {
return $base;
}

$uses_relative = array('', 'ftp', 'http', 'gopher', 'nntp', 'imap',
'wais', 'file', 'https', 'shttp', 'mms',
'prospero', 'rtsp', 'rtspu', 'sftp',
'svn', 'svn+ssh', 'ws', 'wss');

$pbase = parse_url($base);
$prel = parse_url($rel);

if (array_key_exists('path', $pbase) && $pbase['path'] === '/') {
unset($pbase['path']);
}

if (isset($prel['scheme'])) {
if ($prel['scheme'] != $pbase['scheme'] || in_array($prel['scheme'], $uses_relative) == false) {
return $rel;
}
}

$merged = array_merge($pbase, $prel);

// Handle relative paths:
// 'path/to/file.ext'
// './path/to/file.ext'
if (array_key_exists('path', $prel) && substr($prel['path'], 0, 1) != '/') {

// Normalize: './path/to/file.ext' => 'path/to/file.ext'
if (substr($prel['path'], 0, 2) === './') {
$prel['path'] = substr($prel['path'], 2);
}

if (array_key_exists('path', $pbase)) {
$dir = preg_replace('@/[^/]*$@', '', $pbase['path']);
$merged['path'] = $dir . '/' . $prel['path'];
} else {
$merged['path'] = '/' . $prel['path'];
}

}

if(array_key_exists('path', $merged)) {
// Get the path components, and remove the initial empty one
$pathParts = explode('/', $merged['path']);
array_shift($pathParts);

$path = [];
$prevPart = '';
foreach ($pathParts as $part) {
if ($part == '..' && count($path) > 0) {
// Cancel out the parent directory (if there's a parent to cancel)
$parent = array_pop($path);
// But if it was also a parent directory, leave it in
if ($parent == '..') {
array_push($path, $parent);
array_push($path, $part);
}
} else if ($prevPart != '' || ($part != '.' && $part != '')) {
// Don't include empty or current-directory components
if ($part == '.') {
$part = '';
}
array_push($path, $part);
}
$prevPart = $part;
}
$merged['path'] = '/' . implode('/', $path);
}

$ret = '';
if (isset($merged['scheme'])) {
$ret .= $merged['scheme'] . ':';
}

if (isset($merged['scheme']) || isset($merged['host'])) {
$ret .= '//';
}

if (isset($prel['host'])) {
$hostSource = $prel;
} else {
$hostSource = $pbase;
}

// username, password, and port are associated with the hostname, not merged
if (isset($hostSource['host'])) {
if (isset($hostSource['user'])) {
$ret .= $hostSource['user'];
if (isset($hostSource['pass'])) {
$ret .= ':' . $hostSource['pass'];
}
$ret .= '@';
}
$ret .= $hostSource['host'];
if (isset($hostSource['port'])) {
$ret .= ':' . $hostSource['port'];
}
}

if (isset($merged['path'])) {
$ret .= $merged['path'];
}

if (isset($prel['query'])) {
$ret .= '?' . $prel['query'];
}

if (isset($prel['fragment'])) {
$ret .= '#' . $prel['fragment'];
}

return $ret;
}

0 comments on commit df58f5b

Please sign in to comment.