Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Replace Locator and Content\Type\Sniffer with Content\Detector #849

Draft
wants to merge 11 commits into
base: master
Choose a base branch
from
52 changes: 52 additions & 0 deletions src/Content/Detector.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
<?php

// SPDX-FileCopyrightText: 2004-2023 Ryan Parman, Sam Sneddon, Ryan McCue
// SPDX-License-Identifier: BSD-3-Clause

declare(strict_types=1);

namespace SimplePie\Content;

use SimplePie\HTTP\Response;
use SimplePie\SimplePie;

/**
* Helper for feed auto-discovery and type sniffing
*
*
* This interface replaces
* - \SimplePie\Locator and
* - \SimplePie\Content\Type\Sniffer
*/
interface Detector
{
/**
* Discover possible feed urls from HTML response
*
* @see https://simplepie.org/wiki/reference/simplepie/set_autodiscovery_level
*
* Inspired by the Ultra-liberal RSS locator from Mark Pilgrim
* @link http://web.archive.org/web/20110607232437/http://diveintomark.org/archives/2002/08/15/ultraliberal_rss_locator
*
* @param SimplePie::LOCATOR_* $discovery_level
*
* @return string[] Array of possible feed urls. The urls are not requested or checked for containing a feed
*/
public function discover_possible_feed_urls(Response $response, int $discovery_level = SimplePie::LOCATOR_ALL): array;

/**
* Check if the response contains a feed
*
* @return bool
*/
public function contains_feed(Response $response): bool;

/**
* Get the IANA Media-Type of the provided response
*
* @link https://www.iana.org/assignments/media-types/media-types.xhtml
*
* @return string Actual Media-Type
*/
public function detect_media_type(Response $response): string;
}
119 changes: 119 additions & 0 deletions src/Content/VerifiedFeedsDetector.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
<?php

// SPDX-FileCopyrightText: 2004-2023 Ryan Parman, Sam Sneddon, Ryan McCue
// SPDX-License-Identifier: BSD-3-Clause

declare(strict_types=1);

namespace SimplePie\Content;

use SimplePie\Content\Type\Sniffer;
use SimplePie\File;
use SimplePie\HTTP\Response;
use SimplePie\Locator;
use SimplePie\Registry;
use SimplePie\RegistryAware;
use SimplePie\SimplePie;

/**
* BC helper for feed auto-discovery and type sniffing
*
*
* This class uses
* - \SimplePie\Locator and
* - \SimplePie\Content\Type\Sniffer
*
* @internal
*/
final class VerifiedFeedsDetector implements Detector, RegistryAware
{
/**
* @var Registry $registry
*/
private $registry;

public function set_registry(Registry $registry)
{
$this->registry = $registry;
}

/**
* Discover possible feed urls from HTML response
*
* @see https://simplepie.org/wiki/reference/simplepie/set_autodiscovery_level
*
* Inspired by the Ultra-liberal RSS locator from Mark Pilgrim
* @link http://web.archive.org/web/20110607232437/http://diveintomark.org/archives/2002/08/15/ultraliberal_rss_locator
*
* @param SimplePie::LOCATOR_* $discovery_level
*
* @return string[] Array of possible feed urls. The urls are not requested or checked for containing a feed
*/
public function discover_possible_feed_urls(Response $response, int $discovery_level = SimplePie::LOCATOR_ALL): array
{
/** @var Locator */
$locator = $this->registry->create(
Locator::class,
[
(! $response instanceof File) ? File::fromResponse($response) : $response,
10,
null,
10,
false,
[]
]
);

$all_discovered_feeds = [];

/** @var File|null */
$result = $locator->find($discovery_level, $all_discovered_feeds);

if (is_object($result) && $result instanceof Response) {
return [$result->get_permanent_uri()];
}

return [];
}

/**
* Check if the response contains a feed
*
* @return bool
*/
public function contains_feed(Response $response): bool
{
/** @var Locator */
$locator = $this->registry->create(
Locator::class,
[
(! $response instanceof File) ? File::fromResponse($response) : $response,
10,
null,
10,
false,
[]
]
);

return (bool) $locator->is_feed($response, false);
}

/**
* Get the IANA Media-Type of the provided response
*
* @link https://www.iana.org/assignments/media-types/media-types.xhtml
*
* @return string Actual Media-Type
*/
public function detect_media_type(Response $response): string
{
/** @var Sniffer */
$sniffer = $this->registry->create(
Sniffer::class,
[$response]
);

return (string) $sniffer->get_type();
}
}
3 changes: 3 additions & 0 deletions src/Registry.php
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,9 @@

namespace SimplePie;

use SimplePie\Content\Detector;
use SimplePie\Content\Type\Sniffer;
use SimplePie\Content\VerifiedFeedsDetector;
use SimplePie\Parse\Date;
use SimplePie\XML\Declaration\Parser as DeclarationParser;

Expand Down Expand Up @@ -45,6 +47,7 @@ class Registry
Misc::class => Misc::class,
DeclarationParser::class => DeclarationParser::class,
Date::class => Date::class,
Detector::class => VerifiedFeedsDetector::class,
];

/**
Expand Down
49 changes: 45 additions & 4 deletions src/SimplePie.php
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
use SimplePie\Cache\DataCache;
use SimplePie\Cache\NameFilter;
use SimplePie\Cache\Psr16;
use SimplePie\Content\Detector;
use SimplePie\Content\Type\Sniffer;
use SimplePie\Exception as SimplePieException;
use SimplePie\Exception\HttpException;
Expand Down Expand Up @@ -2008,7 +2009,10 @@ protected function fetch_data(&$cache)
);
}

if (!$locate->is_feed($file)) {
/** @var Detector */
$detector = $this->registry->create(Detector::class);

if (! $detector->contains_feed($file)) {
$copyStatusCode = $file->get_status_code();
$copyContentType = $file->get_header_line('content-type');
try {
Expand All @@ -2026,10 +2030,47 @@ protected function fetch_data(&$cache)
}
// Now also do feed discovery, but if microformats were found don't
// overwrite the current value of file.
$discovered = $locate->find(
$this->autodiscovery,
$this->all_discovered_feeds
$possible_feed_urls = $detector->discover_possible_feed_urls(
$file,
$this->autodiscovery
);

$discovered = null;
$checked_feeds = 0;

foreach ($possible_feed_urls as $href) {
$checked_feeds++;

try {
//code...
$possible_feed = $http_client->request(
Client::METHOD_GET,
$href,
['Accept' => SimplePie::DEFAULT_HTTP_ACCEPT_HEADER]
);
} catch (HttpException $th) {
continue;
}

if (
(
preg_match('/^http(s)?:\/\//i', $href)
|| (
$possible_feed->get_status_code() === 200
|| $possible_feed->get_status_code() > 206
&& $possible_feed->get_status_code() < 300
)
)
&& $detector->contains_feed($possible_feed)
) {
$this->all_discovered_feeds[$href] = $possible_feed;
}
}

if (! empty($this->all_discovered_feeds)) {
$discovered = $this->all_discovered_feeds[0];
}

if ($microformats) {
if ($hub = $locate->get_rel_link('hub')) {
$self = $locate->get_rel_link('self');
Expand Down