diff --git a/frozen_soup/__init__.py b/frozen_soup/__init__.py
index 543e161..f252f7a 100644
--- a/frozen_soup/__init__.py
+++ b/frozen_soup/__init__.py
@@ -1,4 +1,4 @@
-from typing import Optional, Union
+from typing import Optional, Union, List
import requests
@@ -15,6 +15,7 @@ def freeze_to_string(
session: Optional[requests.Session] = None,
timeout: Union[float, tuple[float, float], None] = 900.0,
formatter: str = 'html5',
+ knockouts: Optional[List[str]] = None,
) -> str:
if session is None:
session = requests.Session()
@@ -23,6 +24,12 @@ def freeze_to_string(
soup = BeautifulSoup(r.text, 'html.parser')
+ # Process the knockouts first so we don't do any extra work on those
+ if knockouts is not None:
+ for selector in knockouts:
+ for tag in soup.css.select(selector):
+ tag.decompose()
+
base_url = url
# Find the first , which could follow a
diff --git a/frozen_soup/__main__.py b/frozen_soup/__main__.py
index 6c553a9..2d6b25b 100644
--- a/frozen_soup/__main__.py
+++ b/frozen_soup/__main__.py
@@ -29,18 +29,27 @@ def main() -> int:
'-T', '--timeout',
type=float,
default=900.0,
+ metavar= 'SECONDS',
help='default connect and read timeout in seconds'
)
parser.add_argument(
'--connect-timeout',
type=float,
+ metavar= 'SECONDS',
help='default connect timeout in seconds (will override --timeout)'
)
parser.add_argument(
'--read-timeout',
type=float,
+ metavar= 'SECONDS',
help='default read timeout in seconds (will override --timeout)'
)
+ parser.add_argument(
+ '--knockout',
+ action= 'append',
+ metavar= 'SELECTOR',
+ help='knock out elements matching the given CSS selector'
+ )
args = parser.parse_args()
@@ -48,7 +57,7 @@ def main() -> int:
if (args.connect_timeout or args.read_timeout):
timeout = (args.connect_timeout or timeout, args.read_timeout or timeout)
- print(freeze_to_string(args.url, timeout=timeout))
+ print(freeze_to_string(args.url, timeout=timeout, knockouts=args.knockout))
return 0
diff --git a/tests/test_knockout.py b/tests/test_knockout.py
new file mode 100644
index 0000000..7e94252
--- /dev/null
+++ b/tests/test_knockout.py
@@ -0,0 +1,47 @@
+import pytest
+
+import requests
+from requests_testadapter import TestAdapter, TestSession
+
+from frozen_soup import freeze_to_string
+
+@pytest.fixture
+def session() -> requests.Session:
+ s = TestSession()
+
+ s.mount("http://test/content", TestAdapter(
+ b'/* WONTON */',
+ headers= { 'Content-type' : 'text/plain' }
+ ))
+
+ s.mount(
+ "http://test/html",
+ TestAdapter(b'pow!
')
+ )
+ s.mount(
+ "http://test/multiple",
+ TestAdapter(b'pow!bang!
')
+ )
+ s.mount(
+ "http://test/bad-img",
+ TestAdapter(b'pow!
')
+ )
+
+ return s
+
+def test_knockout(session):
+ out = freeze_to_string('http://test/html', session, knockouts=['.ko'])
+ assert out == '
'
+
+def test_knockout_multiple_elements(session):
+ out = freeze_to_string('http://test/multiple', session, knockouts=['.ko'])
+ assert out == '
'
+
+def test_knockout_multiple_selectors(session):
+ out = freeze_to_string('http://test/multiple', session, knockouts=['i', 'b'])
+ assert out == '
'
+
+# if the knockout doesn't kill the
we'll get an exception
+def test_knockout_img(session):
+ out = freeze_to_string('http://test/bad-img', session, knockouts=['img'])
+ assert out == 'pow!'