Skip to content

Commit

Permalink
Merge pull request #310924 from fabaff/trafilatura-bump
Browse files Browse the repository at this point in the history
python312Packages.courlan: 1.0.0 -> 1.1.0, python311Packages.trafilatura: 1.8.1 -> 1.9.0
  • Loading branch information
fabaff committed May 12, 2024
2 parents f18b569 + 60bc630 commit 17838a4
Show file tree
Hide file tree
Showing 2 changed files with 63 additions and 65 deletions.
56 changes: 29 additions & 27 deletions pkgs/development/python-modules/courlan/default.nix
Original file line number Diff line number Diff line change
@@ -1,56 +1,58 @@
{ lib
, buildPythonPackage
, fetchPypi
, langcodes
, pytestCheckHook
, tld
, urllib3
, pythonOlder
{
lib,
babel,
buildPythonPackage,
fetchPypi,
langcodes,
pytestCheckHook,
pythonOlder,
setuptools,
tld,
urllib3,
}:

buildPythonPackage rec {
pname = "courlan";
version = "1.0.0";
format = "setuptools";
version = "1.1.0";
pyproject = true;

disabled = pythonOlder "3.6";

src = fetchPypi {
inherit pname version;
hash = "sha256-PDVRHDZSXLL5Qc1nCbejp0LtlfC55WyX7sDBb9wDUYM=";
hash = "sha256-1wZoQzTxi+StofvVfyaArfADZkj22ECFL3pIItOt/Y0=";
};

propagatedBuildInputs = [
# Tests try to write to /tmp directly. use $TMPDIR instead.
postPatch = ''
substituteInPlace tests/unit_tests.py \
--replace-fail "\"courlan --help\"" "\"$out/bin/courlan --help\"" \
--replace-fail "courlan_bin = \"courlan\"" "courlan_bin = \"$out/bin/courlan\"" \
--replace-fail "/tmp" "$TMPDIR"
'';

build-system = [ setuptools ];

dependencies = [
babel
langcodes
tld
urllib3
];

nativeCheckInputs = [
pytestCheckHook
];
nativeCheckInputs = [ pytestCheckHook ];

# disable tests that require an internet connection
disabledTests = [
"test_urlcheck"
];

# tests try to write to /tmp directly. use $TMPDIR instead.
postPatch = ''
substituteInPlace tests/unit_tests.py \
--replace "\"courlan --help\"" "\"$out/bin/courlan --help\"" \
--replace "courlan_bin = \"courlan\"" "courlan_bin = \"$out/bin/courlan\"" \
--replace "/tmp" "$TMPDIR"
'';
disabledTests = [ "test_urlcheck" ];

pythonImportsCheck = [ "courlan" ];

meta = with lib; {
description = "Clean, filter and sample URLs to optimize data collection";
mainProgram = "courlan";
homepage = "https://github.com/adbar/courlan";
changelog = "https://github.com/adbar/courlan/blob/v${version}/HISTORY.md";
license = licenses.asl20;
maintainers = with maintainers; [ jokatzke ];
mainProgram = "courlan";
};
}
72 changes: 34 additions & 38 deletions pkgs/development/python-modules/trafilatura/default.nix
Original file line number Diff line number Diff line change
@@ -1,35 +1,43 @@
{ lib
, buildPythonPackage
, fetchPypi
, pytestCheckHook
, pythonOlder
, certifi
, charset-normalizer
, courlan
, htmldate
, justext
, lxml
, urllib3
, setuptools
{
lib,
buildPythonPackage,
certifi,
charset-normalizer,
courlan,
fetchPypi,
htmldate,
justext,
lxml,
pytestCheckHook,
pythonOlder,
setuptools,
urllib3,
}:

buildPythonPackage rec {
pname = "trafilatura";
version = "1.8.1";
version = "1.9.0";
pyproject = true;

disabled = pythonOlder "3.9";

src = fetchPypi {
inherit pname version;
hash = "sha256-a4eN/b1cXftV0Pgwfyt9wVrDRYBU90hh/5ihcvXjhyA=";
hash = "sha256-5oM9KauKE+2FOTfXyR5oaLxi774QIUrCsQZDbdI9FBI=";
};

nativeBuildInputs = [
setuptools
];
# Patch out gui cli because it is not supported in this packaging and
# nixify path to the trafilatura binary in the test suite
postPatch = ''
substituteInPlace setup.py \
--replace-fail '"trafilatura_gui=trafilatura.gui:main",' ""
substituteInPlace tests/cli_tests.py \
--replace-fail "trafilatura_bin = 'trafilatura'" "trafilatura_bin = '$out/bin/trafilatura'"
'';

build-system = [ setuptools ];

propagatedBuildInputs = [
dependencies = [
certifi
charset-normalizer
courlan
Expand All @@ -39,34 +47,22 @@ buildPythonPackage rec {
urllib3
];

nativeCheckInputs = [
pytestCheckHook
];
nativeCheckInputs = [ pytestCheckHook ];

disabledTests = [
# Disable tests that require an internet connection
"test_cli_pipeline"
"test_crawl_page"
"test_download"
"test_fetch"
"test_redirection"
"test_meta_redirections"
"test_crawl_page"
"test_whole"
"test_probing"
"test_cli_pipeline"
"test_queue"
"test_redirection"
"test_whole"
];

# patch out gui cli because it is not supported in this packaging
# nixify path to the trafilatura binary in the test suite
postPatch = ''
substituteInPlace setup.py \
--replace-fail '"trafilatura_gui=trafilatura.gui:main",' ""
substituteInPlace tests/cli_tests.py \
--replace-fail "trafilatura_bin = 'trafilatura'" "trafilatura_bin = '$out/bin/trafilatura'"
'';

pythonImportsCheck = [
"trafilatura"
];
pythonImportsCheck = [ "trafilatura" ];

meta = with lib; {
description = "Python package and command-line tool designed to gather text on the Web";
Expand Down

0 comments on commit 17838a4

Please sign in to comment.