-
Notifications
You must be signed in to change notification settings - Fork 386
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Fix absolutize URL for several cases #861
base: master
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -117,9 +117,27 @@ public function get_item_tags(string $namespace, string $tag) | |
return null; | ||
} | ||
|
||
/** | ||
* Get base URL of the item itself. | ||
* Returns `<xml:base>` or feed base URL. | ||
* Similar to `Item::get_base()` but can safely be used during initialisation methods | ||
* such as `Item::get_links()` (`Item::get_base()` and `Item::get_links()` call each-other) | ||
* and is not affected by enclosures. | ||
* | ||
* @param array<string, mixed> $element | ||
* @see get_base | ||
*/ | ||
protected function get_own_base(array $element = []): string | ||
{ | ||
if (!empty($element['xml_base_explicit']) && isset($element['xml_base'])) { | ||
return $element['xml_base']; | ||
} | ||
return $this->feed->get_base(); | ||
} | ||
|
||
/** | ||
* Get the base URL value. | ||
* Uses `<xml:base>`, or item link, or feed base URL. | ||
* Uses `<xml:base>`, or item link, or enclosure link, or feed base URL. | ||
* | ||
* @param array<string, mixed> $element | ||
* @return string | ||
|
@@ -812,27 +830,27 @@ public function get_links(string $rel = 'alternate') | |
foreach ((array) $this->get_item_tags(\SimplePie\SimplePie::NAMESPACE_ATOM_10, 'link') as $link) { | ||
if (isset($link['attribs']['']['href'])) { | ||
$link_rel = (isset($link['attribs']['']['rel'])) ? $link['attribs']['']['rel'] : 'alternate'; | ||
$this->data['links'][$link_rel][] = $this->sanitize($link['attribs']['']['href'], \SimplePie\SimplePie::CONSTRUCT_IRI, $this->get_base($link)); | ||
$this->data['links'][$link_rel][] = $this->sanitize($link['attribs']['']['href'], \SimplePie\SimplePie::CONSTRUCT_IRI, $this->get_own_base($link)); | ||
} | ||
} | ||
foreach ((array) $this->get_item_tags(\SimplePie\SimplePie::NAMESPACE_ATOM_03, 'link') as $link) { | ||
if (isset($link['attribs']['']['href'])) { | ||
$link_rel = (isset($link['attribs']['']['rel'])) ? $link['attribs']['']['rel'] : 'alternate'; | ||
$this->data['links'][$link_rel][] = $this->sanitize($link['attribs']['']['href'], \SimplePie\SimplePie::CONSTRUCT_IRI, $this->get_base($link)); | ||
$this->data['links'][$link_rel][] = $this->sanitize($link['attribs']['']['href'], \SimplePie\SimplePie::CONSTRUCT_IRI, $this->get_own_base($link)); | ||
} | ||
} | ||
if ($links = $this->get_item_tags(\SimplePie\SimplePie::NAMESPACE_RSS_10, 'link')) { | ||
$this->data['links']['alternate'][] = $this->sanitize($links[0]['data'], \SimplePie\SimplePie::CONSTRUCT_IRI, $this->get_base($links[0])); | ||
$this->data['links']['alternate'][] = $this->sanitize($links[0]['data'], \SimplePie\SimplePie::CONSTRUCT_IRI, $this->get_own_base($links[0])); | ||
} | ||
if ($links = $this->get_item_tags(\SimplePie\SimplePie::NAMESPACE_RSS_090, 'link')) { | ||
$this->data['links']['alternate'][] = $this->sanitize($links[0]['data'], \SimplePie\SimplePie::CONSTRUCT_IRI, $this->get_base($links[0])); | ||
$this->data['links']['alternate'][] = $this->sanitize($links[0]['data'], \SimplePie\SimplePie::CONSTRUCT_IRI, $this->get_own_base($links[0])); | ||
} | ||
if ($links = $this->get_item_tags(\SimplePie\SimplePie::NAMESPACE_RSS_20, 'link')) { | ||
$this->data['links']['alternate'][] = $this->sanitize($links[0]['data'], \SimplePie\SimplePie::CONSTRUCT_IRI, $this->get_base($links[0])); | ||
$this->data['links']['alternate'][] = $this->sanitize($links[0]['data'], \SimplePie\SimplePie::CONSTRUCT_IRI, $this->get_own_base($links[0])); | ||
} | ||
if ($links = $this->get_item_tags(\SimplePie\SimplePie::NAMESPACE_RSS_20, 'guid')) { | ||
if (!isset($links[0]['attribs']['']['isPermaLink']) || strtolower(trim($links[0]['attribs']['']['isPermaLink'])) === 'true') { | ||
$this->data['links']['alternate'][] = $this->sanitize($links[0]['data'], \SimplePie\SimplePie::CONSTRUCT_IRI, $this->get_base($links[0])); | ||
$this->data['links']['alternate'][] = $this->sanitize($links[0]['data'], \SimplePie\SimplePie::CONSTRUCT_IRI, $this->get_own_base($links[0])); | ||
} | ||
} | ||
|
||
|
@@ -1199,11 +1217,11 @@ public function get_enclosures() | |
// PLAYER | ||
if ($player_parent = $this->get_item_tags(\SimplePie\SimplePie::NAMESPACE_MEDIARSS, 'player')) { | ||
if (isset($player_parent[0]['attribs']['']['url'])) { | ||
$player_parent = $this->sanitize($player_parent[0]['attribs']['']['url'], \SimplePie\SimplePie::CONSTRUCT_IRI); | ||
$player_parent = $this->sanitize($player_parent[0]['attribs']['']['url'], \SimplePie\SimplePie::CONSTRUCT_IRI, $this->get_base($player_parent[0])); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There were no tests for this section and I have not added any. Help welcome if anyone is motivated. |
||
} | ||
} elseif ($player_parent = $parent->get_channel_tags(\SimplePie\SimplePie::NAMESPACE_MEDIARSS, 'player')) { | ||
if (isset($player_parent[0]['attribs']['']['url'])) { | ||
$player_parent = $this->sanitize($player_parent[0]['attribs']['']['url'], \SimplePie\SimplePie::CONSTRUCT_IRI); | ||
$player_parent = $this->sanitize($player_parent[0]['attribs']['']['url'], \SimplePie\SimplePie::CONSTRUCT_IRI, $this->get_base($player_parent[0])); | ||
} | ||
} | ||
|
||
|
@@ -1323,13 +1341,13 @@ public function get_enclosures() | |
if ($thumbnails = $this->get_item_tags(\SimplePie\SimplePie::NAMESPACE_MEDIARSS, 'thumbnail')) { | ||
foreach ($thumbnails as $thumbnail) { | ||
if (isset($thumbnail['attribs']['']['url'])) { | ||
$thumbnails_parent[] = $this->sanitize($thumbnail['attribs']['']['url'], \SimplePie\SimplePie::CONSTRUCT_IRI); | ||
$thumbnails_parent[] = $this->sanitize($thumbnail['attribs']['']['url'], \SimplePie\SimplePie::CONSTRUCT_IRI, $this->get_base($thumbnail)); | ||
} | ||
} | ||
} elseif ($thumbnails = $parent->get_channel_tags(\SimplePie\SimplePie::NAMESPACE_MEDIARSS, 'thumbnail')) { | ||
foreach ($thumbnails as $thumbnail) { | ||
if (isset($thumbnail['attribs']['']['url'])) { | ||
$thumbnails_parent[] = $this->sanitize($thumbnail['attribs']['']['url'], \SimplePie\SimplePie::CONSTRUCT_IRI); | ||
$thumbnails_parent[] = $this->sanitize($thumbnail['attribs']['']['url'], \SimplePie\SimplePie::CONSTRUCT_IRI, $this->get_base($thumbnail)); | ||
} | ||
} | ||
} | ||
|
@@ -1453,7 +1471,7 @@ public function get_enclosures() | |
if (isset($content['attribs']['']['width'])) { | ||
$width = $this->sanitize($content['attribs']['']['width'], \SimplePie\SimplePie::CONSTRUCT_TEXT); | ||
} | ||
$url = $this->sanitize($content['attribs']['']['url'], \SimplePie\SimplePie::CONSTRUCT_IRI); | ||
$url = $this->sanitize($content['attribs']['']['url'], \SimplePie\SimplePie::CONSTRUCT_IRI, $this->get_base($content)); | ||
|
||
// Checking the other optional media: elements. Priority: media:content, media:group, item, channel | ||
|
||
|
@@ -1712,9 +1730,9 @@ public function get_enclosures() | |
|
||
// PLAYER | ||
if (isset($content['child'][\SimplePie\SimplePie::NAMESPACE_MEDIARSS]['player'])) { | ||
$player = $this->sanitize($content['child'][\SimplePie\SimplePie::NAMESPACE_MEDIARSS]['player'][0]['attribs']['']['url'], \SimplePie\SimplePie::CONSTRUCT_IRI); | ||
$player = $this->sanitize($content['child'][\SimplePie\SimplePie::NAMESPACE_MEDIARSS]['player'][0]['attribs']['']['url'], \SimplePie\SimplePie::CONSTRUCT_IRI, $this->get_base($content['child'][\SimplePie\SimplePie::NAMESPACE_MEDIARSS]['player'])); | ||
} elseif (isset($group['child'][\SimplePie\SimplePie::NAMESPACE_MEDIARSS]['player'])) { | ||
$player = $this->sanitize($group['child'][\SimplePie\SimplePie::NAMESPACE_MEDIARSS]['player'][0]['attribs']['']['url'], \SimplePie\SimplePie::CONSTRUCT_IRI); | ||
$player = $this->sanitize($group['child'][\SimplePie\SimplePie::NAMESPACE_MEDIARSS]['player'][0]['attribs']['']['url'], \SimplePie\SimplePie::CONSTRUCT_IRI, $this->get_base($group['child'][\SimplePie\SimplePie::NAMESPACE_MEDIARSS]['player'])); | ||
} else { | ||
$player = $player_parent; | ||
} | ||
|
@@ -1804,14 +1822,14 @@ public function get_enclosures() | |
// THUMBNAILS | ||
if (isset($content['child'][\SimplePie\SimplePie::NAMESPACE_MEDIARSS]['thumbnail'])) { | ||
foreach ($content['child'][\SimplePie\SimplePie::NAMESPACE_MEDIARSS]['thumbnail'] as $thumbnail) { | ||
$thumbnails[] = $this->sanitize($thumbnail['attribs']['']['url'], \SimplePie\SimplePie::CONSTRUCT_IRI); | ||
$thumbnails[] = $this->sanitize($thumbnail['attribs']['']['url'], \SimplePie\SimplePie::CONSTRUCT_IRI, $this->get_base($thumbnail)); | ||
} | ||
if (is_array($thumbnails)) { | ||
$thumbnails = array_values(array_unique($thumbnails)); | ||
} | ||
} elseif (isset($group['child'][\SimplePie\SimplePie::NAMESPACE_MEDIARSS]['thumbnail'])) { | ||
foreach ($group['child'][\SimplePie\SimplePie::NAMESPACE_MEDIARSS]['thumbnail'] as $thumbnail) { | ||
$thumbnails[] = $this->sanitize($thumbnail['attribs']['']['url'], \SimplePie\SimplePie::CONSTRUCT_IRI); | ||
$thumbnails[] = $this->sanitize($thumbnail['attribs']['']['url'], \SimplePie\SimplePie::CONSTRUCT_IRI, $this->get_base($thumbnail)); | ||
} | ||
if (is_array($thumbnails)) { | ||
$thumbnails = array_values(array_unique($thumbnails)); | ||
|
@@ -1909,7 +1927,7 @@ public function get_enclosures() | |
$width = $this->sanitize($content['attribs']['']['width'], \SimplePie\SimplePie::CONSTRUCT_TEXT); | ||
} | ||
if (isset($content['attribs']['']['url'])) { | ||
$url = $this->sanitize($content['attribs']['']['url'], \SimplePie\SimplePie::CONSTRUCT_IRI); | ||
$url = $this->sanitize($content['attribs']['']['url'], \SimplePie\SimplePie::CONSTRUCT_IRI, $this->get_base($content)); | ||
} | ||
// Checking the other optional media: elements. Priority: media:content, media:group, item, channel | ||
|
||
|
@@ -2064,7 +2082,7 @@ public function get_enclosures() | |
// PLAYER | ||
if (isset($content['child'][\SimplePie\SimplePie::NAMESPACE_MEDIARSS]['player'])) { | ||
if (isset($content['child'][\SimplePie\SimplePie::NAMESPACE_MEDIARSS]['player'][0]['attribs']['']['url'])) { | ||
$player = $this->sanitize($content['child'][\SimplePie\SimplePie::NAMESPACE_MEDIARSS]['player'][0]['attribs']['']['url'], \SimplePie\SimplePie::CONSTRUCT_IRI); | ||
$player = $this->sanitize($content['child'][\SimplePie\SimplePie::NAMESPACE_MEDIARSS]['player'][0]['attribs']['']['url'], \SimplePie\SimplePie::CONSTRUCT_IRI, $this->get_base($content['child'][\SimplePie\SimplePie::NAMESPACE_MEDIARSS]['player'][0])); | ||
} | ||
} else { | ||
$player = $player_parent; | ||
|
@@ -2120,7 +2138,7 @@ public function get_enclosures() | |
if (isset($content['child'][\SimplePie\SimplePie::NAMESPACE_MEDIARSS]['thumbnail'])) { | ||
foreach ($content['child'][\SimplePie\SimplePie::NAMESPACE_MEDIARSS]['thumbnail'] as $thumbnail) { | ||
if (isset($thumbnail['attribs']['']['url'])) { | ||
$thumbnails[] = $this->sanitize($thumbnail['attribs']['']['url'], \SimplePie\SimplePie::CONSTRUCT_IRI); | ||
$thumbnails[] = $this->sanitize($thumbnail['attribs']['']['url'], \SimplePie\SimplePie::CONSTRUCT_IRI, $this->get_base($thumbnail)); | ||
} | ||
} | ||
if (is_array($thumbnails)) { | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2456,8 +2456,9 @@ public function get_image_tags(string $namespace, string $tag) | |
/** | ||
* Get the base URL value from the feed | ||
* | ||
* Uses `<xml:base>` if available, otherwise uses the first link in the | ||
* feed, or failing that, the URL of the feed itself. | ||
* Uses `<xml:base>` if available, | ||
* otherwise uses the first 'self' link or the first 'alternate' link of the feed, | ||
* or failing that, the URL of the feed itself. | ||
Comment on lines
+2459
to
+2461
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Original RSS specification requires URLs to include scheme. I would expect that if the feed has relative URLs the content is taken from the HTML (alternate) version unchanged, and so the links should be resolved relative to that. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is also reflected in the previous definition, as There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Though I guess There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Looks like https://www.rssboard.org/news/151/relative-links discusses this and recommends And for completeness Atom only seems to mention
And, as mentioned in one of the comments on the RSS article, the
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Just to follow-up on this. It looks like we agree, right? In other words, there does not seem to be any (new) test in contradiction. |
||
* | ||
* @see get_link | ||
* @see subscribe_url | ||
|
@@ -2469,8 +2470,12 @@ public function get_base(array $element = []) | |
{ | ||
if (!empty($element['xml_base_explicit']) && isset($element['xml_base'])) { | ||
return $element['xml_base']; | ||
} elseif ($this->get_link() !== null) { | ||
return $this->get_link(); | ||
} | ||
if (($link = $this->get_link(0, 'self')) !== null) { | ||
return $link; | ||
} | ||
if (($link = $this->get_link(0, 'alternate')) !== null) { | ||
return $link; | ||
} | ||
|
||
return $this->subscribe_url() ?? ''; | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -88,6 +88,38 @@ public static function getLinkProvider(): iterable | |
, | ||
'http://example.net/link?a=%22b%22&c=%3Cd%3E', | ||
]; | ||
|
||
yield 'Test RSS 2.0 with channel link and enclosure' => [ | ||
<<<XML | ||
<rss version="2.0" xmlns:media="http://search.yahoo.com/mrss/"> | ||
<channel> | ||
<link>http://example.net/tests/</link> | ||
<item> | ||
<link>/tests/3/</link> | ||
<media:content url="/images/3.jpg" medium="image"></media:content> | ||
</item> | ||
</channel> | ||
</rss> | ||
XML | ||
, | ||
'http://example.net/images/3.jpg', | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Was wrongly returning |
||
]; | ||
Comment on lines
+92
to
+106
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This was the original bug I faced, which had me investigate the issue (which turned out to be more severe and complex than anticipated...) |
||
|
||
yield 'Test RSS 2.0 with Atom channel link and enclosure' => [ | ||
<<<XML | ||
<rss version="2.0" xmlns:atom="http://www.w3.org/2005/Atom" xmlns:media="http://search.yahoo.com/mrss/"> | ||
<channel> | ||
<atom:link href="http://example.net/tests/" rel="self" type="application/rss+xml" /> | ||
<item> | ||
<link>/tests/4/</link> | ||
<media:content url="/images/4.jpg" medium="image"></media:content> | ||
</item> | ||
</channel> | ||
</rss> | ||
XML | ||
, | ||
'http://example.net/images/4.jpg', | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Was wrongly returning |
||
]; | ||
} | ||
|
||
/** | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -3262,6 +3262,50 @@ public static function getPermalinkDataProvider(): array | |
, | ||
'http://example.com/', | ||
], | ||
'Test RSS 2.0 with channel link and enclosure from another domain' => [ | ||
<<<XML | ||
<rss version="2.0" xmlns:media="http://search.yahoo.com/mrss/"> | ||
<channel> | ||
<link>http://example.net/tests/</link> | ||
<item> | ||
<link>/tests/1/</link> | ||
<media:content url="http://example.com/images/1.jpg" medium="image"></media:content> | ||
</item> | ||
</channel> | ||
</rss> | ||
XML | ||
, | ||
'http://example.net/tests/1/', | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Was wrongly returning |
||
], | ||
'Test RSS 2.0 with Atom channel link and relative enclosure' => [ | ||
<<<XML | ||
<rss version="2.0" xmlns:atom="http://www.w3.org/2005/Atom" xmlns:media="http://search.yahoo.com/mrss/"> | ||
<channel> | ||
<atom:link href="http://example.net/tests/" rel="self" type="application/rss+xml" /> | ||
<item> | ||
<link>/tests/2/</link> | ||
<media:content url="/images/2.jpg" medium="image"></media:content> | ||
</item> | ||
</channel> | ||
</rss> | ||
XML | ||
, | ||
'http://example.net/tests/2/', | ||
], | ||
'Test RSS 2.0 with xml:base and enclosure from another domain' => [ | ||
<<<XML | ||
<rss version="2.0" xmlns:atom="http://www.w3.org/2005/Atom" xmlns:media="http://search.yahoo.com/mrss/"> | ||
<channel> | ||
<item> | ||
<link xml:base="http://example.net/tests/">/tests/3/</link> | ||
<media:content url="http://example.com/images/3.jpg" medium="image"></media:content> | ||
</item> | ||
</channel> | ||
</rss> | ||
XML | ||
, | ||
'http://example.net/tests/3/', | ||
], | ||
'Test Atom 1.0 xmlbase 1' => [ | ||
<<<EOT | ||
<feed xmlns="http://www.w3.org/2005/Atom" xml:base="http://example.com/"> | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Apparently,
xml:base
itself should be resolved recursively relative toxml:base
in parent elements. Thankfully, this appears to be handled by our ownParser
class.