Skip to content

Commit

Permalink
Fix SimplePie absolutize URL for several cases
Browse files Browse the repository at this point in the history
This is especially relevant for HTML+XPath mode, for which we rely on proper URL "absolutize"

Upstream PR simplepie/simplepie#861
  • Loading branch information
Alkarex committed Apr 6, 2024
1 parent 1c684a9 commit 01f520a
Show file tree
Hide file tree
Showing 2 changed files with 47 additions and 27 deletions.
18 changes: 10 additions & 8 deletions lib/SimplePie/SimplePie.php
Original file line number Diff line number Diff line change
Expand Up @@ -2270,8 +2270,9 @@ public function get_image_tags($namespace, $tag)
/**
* Get the base URL value from the feed
*
* Uses `<xml:base>` if available, otherwise uses the first link in the
* feed, or failing that, the URL of the feed itself.
* Uses `<xml:base>` if available,
* otherwise uses the first 'self' link or the first 'alternate' link of the feed,
* or failing that, the URL of the feed itself.
*
* @see get_link
* @see subscribe_url
Expand All @@ -2281,16 +2282,17 @@ public function get_image_tags($namespace, $tag)
*/
public function get_base($element = array())
{
if (!empty($element['xml_base_explicit']) && isset($element['xml_base']))
{
if (!empty($element['xml_base_explicit']) && isset($element['xml_base'])) {
return $element['xml_base'];
}
elseif ($this->get_link() !== null)
{
return $this->get_link();
if (($link = $this->get_link(0, 'self')) !== null) {
return $link;
}
if (($link = $this->get_link(0, 'alternate')) !== null) {
return $link;
}

return $this->subscribe_url();
return $this->subscribe_url() ?? '';
}

/**
Expand Down
56 changes: 37 additions & 19 deletions lib/SimplePie/SimplePie/Item.php
Original file line number Diff line number Diff line change
Expand Up @@ -151,9 +151,27 @@ public function get_item_tags($namespace, $tag)
return null;
}

/**
* Get base URL of the item itself.
* Returns `<xml:base>` or feed base URL.
* Similar to `get_base()` but can safely be used during initialisation methods
* such as `get_links()` (`get_base()` and `get_links()` call each-other)
* and is not affected by enclosures.
*
* @param array<string, mixed> $element
* @see get_base
*/
protected function get_own_base(array $element = []): string
{
if (!empty($element['xml_base_explicit']) && isset($element['xml_base'])) {
return $element['xml_base'];
}
return $this->feed->get_base();
}

/**
* Get the base URL value.
* Uses `<xml:base>`, or item link, or feed base URL.
* Uses `<xml:base>`, or item link, or enclosure link, or feed base URL.
*
* @param array $element
* @return string
Expand Down Expand Up @@ -1000,7 +1018,7 @@ public function get_links($rel = 'alternate')
if (isset($link['attribs']['']['href']))
{
$link_rel = (isset($link['attribs']['']['rel'])) ? $link['attribs']['']['rel'] : 'alternate';
$this->data['links'][$link_rel][] = $this->sanitize($link['attribs']['']['href'], SIMPLEPIE_CONSTRUCT_IRI, $this->get_base($link));
$this->data['links'][$link_rel][] = $this->sanitize($link['attribs']['']['href'], SIMPLEPIE_CONSTRUCT_IRI, $this->get_own_base($link));

}
}
Expand All @@ -1009,26 +1027,26 @@ public function get_links($rel = 'alternate')
if (isset($link['attribs']['']['href']))
{
$link_rel = (isset($link['attribs']['']['rel'])) ? $link['attribs']['']['rel'] : 'alternate';
$this->data['links'][$link_rel][] = $this->sanitize($link['attribs']['']['href'], SIMPLEPIE_CONSTRUCT_IRI, $this->get_base($link));
$this->data['links'][$link_rel][] = $this->sanitize($link['attribs']['']['href'], SIMPLEPIE_CONSTRUCT_IRI, $this->get_own_base($link));
}
}
if ($links = $this->get_item_tags(SIMPLEPIE_NAMESPACE_RSS_10, 'link'))
{
$this->data['links']['alternate'][] = $this->sanitize($links[0]['data'], SIMPLEPIE_CONSTRUCT_IRI, $this->get_base($links[0]));
$this->data['links']['alternate'][] = $this->sanitize($links[0]['data'], SIMPLEPIE_CONSTRUCT_IRI, $this->get_own_base($links[0]));
}
if ($links = $this->get_item_tags(SIMPLEPIE_NAMESPACE_RSS_090, 'link'))
{
$this->data['links']['alternate'][] = $this->sanitize($links[0]['data'], SIMPLEPIE_CONSTRUCT_IRI, $this->get_base($links[0]));
$this->data['links']['alternate'][] = $this->sanitize($links[0]['data'], SIMPLEPIE_CONSTRUCT_IRI, $this->get_own_base($links[0]));
}
if ($links = $this->get_item_tags(SIMPLEPIE_NAMESPACE_RSS_20, 'link'))
{
$this->data['links']['alternate'][] = $this->sanitize($links[0]['data'], SIMPLEPIE_CONSTRUCT_IRI, $this->get_base($links[0]));
$this->data['links']['alternate'][] = $this->sanitize($links[0]['data'], SIMPLEPIE_CONSTRUCT_IRI, $this->get_own_base($links[0]));
}
if ($links = $this->get_item_tags(SIMPLEPIE_NAMESPACE_RSS_20, 'guid'))
{
if (!isset($links[0]['attribs']['']['isPermaLink']) || strtolower(trim($links[0]['attribs']['']['isPermaLink'])) === 'true')
{
$this->data['links']['alternate'][] = $this->sanitize($links[0]['data'], SIMPLEPIE_CONSTRUCT_IRI, $this->get_base($links[0]));
$this->data['links']['alternate'][] = $this->sanitize($links[0]['data'], SIMPLEPIE_CONSTRUCT_IRI, $this->get_own_base($links[0]));
}
}

Expand Down Expand Up @@ -1505,14 +1523,14 @@ public function get_enclosures()
{
if (isset($player_parent[0]['attribs']['']['url']))
{
$player_parent = $this->sanitize($player_parent[0]['attribs']['']['url'], SIMPLEPIE_CONSTRUCT_IRI);
$player_parent = $this->sanitize($player_parent[0]['attribs']['']['url'], SIMPLEPIE_CONSTRUCT_IRI, $this->get_base($player_parent[0]));
}
}
elseif ($player_parent = $parent->get_channel_tags(SIMPLEPIE_NAMESPACE_MEDIARSS, 'player'))
{
if (isset($player_parent[0]['attribs']['']['url']))
{
$player_parent = $this->sanitize($player_parent[0]['attribs']['']['url'], SIMPLEPIE_CONSTRUCT_IRI);
$player_parent = $this->sanitize($player_parent[0]['attribs']['']['url'], SIMPLEPIE_CONSTRUCT_IRI, $this->get_base($player_parent[0]));
}
}

Expand Down Expand Up @@ -1679,7 +1697,7 @@ public function get_enclosures()
{
if (isset($thumbnail['attribs']['']['url']))
{
$thumbnails_parent[] = $this->sanitize($thumbnail['attribs']['']['url'], SIMPLEPIE_CONSTRUCT_IRI);
$thumbnails_parent[] = $this->sanitize($thumbnail['attribs']['']['url'], SIMPLEPIE_CONSTRUCT_IRI, $this->get_base($thumbnail));
}
}
}
Expand All @@ -1689,7 +1707,7 @@ public function get_enclosures()
{
if (isset($thumbnail['attribs']['']['url']))
{
$thumbnails_parent[] = $this->sanitize($thumbnail['attribs']['']['url'], SIMPLEPIE_CONSTRUCT_IRI);
$thumbnails_parent[] = $this->sanitize($thumbnail['attribs']['']['url'], SIMPLEPIE_CONSTRUCT_IRI, $this->get_base($thumbnail));
}
}
}
Expand Down Expand Up @@ -1836,7 +1854,7 @@ public function get_enclosures()
{
$width = $this->sanitize($content['attribs']['']['width'], SIMPLEPIE_CONSTRUCT_TEXT);
}
$url = $this->sanitize($content['attribs']['']['url'], SIMPLEPIE_CONSTRUCT_IRI);
$url = $this->sanitize($content['attribs']['']['url'], SIMPLEPIE_CONSTRUCT_IRI, $this->get_base($content));

// Checking the other optional media: elements. Priority: media:content, media:group, item, channel

Expand Down Expand Up @@ -2195,11 +2213,11 @@ public function get_enclosures()
// PLAYER
if (isset($content['child'][SIMPLEPIE_NAMESPACE_MEDIARSS]['player']))
{
$player = $this->sanitize($content['child'][SIMPLEPIE_NAMESPACE_MEDIARSS]['player'][0]['attribs']['']['url'], SIMPLEPIE_CONSTRUCT_IRI);
$player = $this->sanitize($content['child'][SIMPLEPIE_NAMESPACE_MEDIARSS]['player'][0]['attribs']['']['url'], SIMPLEPIE_CONSTRUCT_IRI, $this->get_base($content['child'][SIMPLEPIE_NAMESPACE_MEDIARSS]['player']));
}
elseif (isset($group['child'][SIMPLEPIE_NAMESPACE_MEDIARSS]['player']))
{
$player = $this->sanitize($group['child'][SIMPLEPIE_NAMESPACE_MEDIARSS]['player'][0]['attribs']['']['url'], SIMPLEPIE_CONSTRUCT_IRI);
$player = $this->sanitize($group['child'][SIMPLEPIE_NAMESPACE_MEDIARSS]['player'][0]['attribs']['']['url'], SIMPLEPIE_CONSTRUCT_IRI, $this->get_base($group['child'][SIMPLEPIE_NAMESPACE_MEDIARSS]['player']));
}
else
{
Expand Down Expand Up @@ -2325,7 +2343,7 @@ public function get_enclosures()
{
foreach ($content['child'][SIMPLEPIE_NAMESPACE_MEDIARSS]['thumbnail'] as $thumbnail)
{
$thumbnails[] = $this->sanitize($thumbnail['attribs']['']['url'], SIMPLEPIE_CONSTRUCT_IRI);
$thumbnails[] = $this->sanitize($thumbnail['attribs']['']['url'], SIMPLEPIE_CONSTRUCT_IRI, $this->get_base($thumbnail));
}
if (is_array($thumbnails))
{
Expand All @@ -2336,7 +2354,7 @@ public function get_enclosures()
{
foreach ($group['child'][SIMPLEPIE_NAMESPACE_MEDIARSS]['thumbnail'] as $thumbnail)
{
$thumbnails[] = $this->sanitize($thumbnail['attribs']['']['url'], SIMPLEPIE_CONSTRUCT_IRI);
$thumbnails[] = $this->sanitize($thumbnail['attribs']['']['url'], SIMPLEPIE_CONSTRUCT_IRI, $this->get_base($thumbnail));
}
if (is_array($thumbnails))
{
Expand Down Expand Up @@ -2460,7 +2478,7 @@ public function get_enclosures()
}
if (isset($content['attribs']['']['url']))
{
$url = $this->sanitize($content['attribs']['']['url'], SIMPLEPIE_CONSTRUCT_IRI);
$url = $this->sanitize($content['attribs']['']['url'], SIMPLEPIE_CONSTRUCT_IRI, $this->get_base($content));
}
// Checking the other optional media: elements. Priority: media:content, media:group, item, channel

Expand Down Expand Up @@ -2673,7 +2691,7 @@ public function get_enclosures()
if (isset($content['child'][SIMPLEPIE_NAMESPACE_MEDIARSS]['player']))
{
if (isset($content['child'][SIMPLEPIE_NAMESPACE_MEDIARSS]['player'][0]['attribs']['']['url'])) {
$player = $this->sanitize($content['child'][SIMPLEPIE_NAMESPACE_MEDIARSS]['player'][0]['attribs']['']['url'], SIMPLEPIE_CONSTRUCT_IRI);
$player = $this->sanitize($content['child'][SIMPLEPIE_NAMESPACE_MEDIARSS]['player'][0]['attribs']['']['url'], SIMPLEPIE_CONSTRUCT_IRI, $this->get_base($content['child'][SIMPLEPIE_NAMESPACE_MEDIARSS]['player'][0]));
}
}
else
Expand Down Expand Up @@ -2750,7 +2768,7 @@ public function get_enclosures()
foreach ($content['child'][SIMPLEPIE_NAMESPACE_MEDIARSS]['thumbnail'] as $thumbnail)
{
if (isset($thumbnail['attribs']['']['url'])) {
$thumbnails[] = $this->sanitize($thumbnail['attribs']['']['url'], SIMPLEPIE_CONSTRUCT_IRI);
$thumbnails[] = $this->sanitize($thumbnail['attribs']['']['url'], SIMPLEPIE_CONSTRUCT_IRI, $this->get_base($thumbnail));
}
}
if (is_array($thumbnails))
Expand Down

0 comments on commit 01f520a

Please sign in to comment.