From d345d9d3d46e7840037d41a2b0dbfb376305872c Mon Sep 17 00:00:00 2001 From: Mike Dalessio Date: Wed, 24 Apr 2024 17:35:30 -0400 Subject: [PATCH] more small improvements to docs --- README.md | 14 +++--- lib/nokogiri/html5/inference.rb | 83 +++++++++++++++++---------------- 2 files changed, 50 insertions(+), 47 deletions(-) diff --git a/README.md b/README.md index be0077f..2c75fa3 100644 --- a/README.md +++ b/README.md @@ -23,23 +23,25 @@ For example: ``` ruby Nokogiri::HTML5::DocumentFragment.parse("foo").to_html -# => "foo" +# => "foo" # where did the tag go!? ``` In the default "in body" mode, the parser will log an error, "Start tag 'td' isn't allowed here", -and drop the tag. This fragment must be parsed "in the context" of a table in order to parse -properly. Thankfully, libgumbo and Nokogiri allow us to do this: +and drop the tag. This particular fragment must be parsed "in the context" of a table in order to +parse properly. + +Thankfully, libgumbo and Nokogiri allow us to set the context node: ``` ruby Nokogiri::HTML5::DocumentFragment.new( Nokogiri::HTML5::Document.new, "foo", - "table" # this is the context node + "table" # <--- this is the context node ).to_html # => "foo" ``` -This is _almost_ correct, but we're seeing another HTML5 parsing rule in action: there may be +This result is _almost_ correct, but we're seeing another HTML5 parsing rule in action: there may be _intermediate parent tags_ that the HTML5 spec requires to be inserted by the parser. In this case, the `` tag must be wrapped in `` tags. @@ -54,7 +56,7 @@ Nokogiri::HTML5::DocumentFragment.new( # => "foo" ``` -Hurrah! This is precisely what Nokogiri::HTML5::Inference.parse does: +Huzzah! That works. And it's precisely what Nokogiri::HTML5::Inference.parse does: ``` ruby Nokogiri::HTML5::Inference.parse("foo").to_html diff --git a/lib/nokogiri/html5/inference.rb b/lib/nokogiri/html5/inference.rb index 1412c50..e501e01 100644 --- a/lib/nokogiri/html5/inference.rb +++ b/lib/nokogiri/html5/inference.rb @@ -12,58 +12,59 @@ module Nokogiri module HTML5 # :markup: markdown # - # The [HTML5 Spec](https://html.spec.whatwg.org/multipage/parsing.html) defines some very precise - # context-dependent parsing rules which can make it challenging to "just parse" a fragment of HTML - # without knowing the parent node -- also called the "context node" -- in which it will be inserted. + # The [HTML5 Spec](https://html.spec.whatwg.org/multipage/parsing.html) defines some very precise + # context-dependent parsing rules which can make it challenging to "just parse" a fragment of HTML + # without knowing the parent node -- also called the "context node" -- in which it will be inserted. # - # Most content in an HTML5 document can be parsed assuming the parser's mode will be in the - # ["in body" insertion mode](https://html.spec.whatwg.org/multipage/parsing.html#parsing-main-inbody), - # but there are some notable exceptions. Perhaps the most problematic to web developers are the - # table-related tags, which will not be parsed properly unless the parser is in the - # ["in table" insertion mode](https://html.spec.whatwg.org/multipage/parsing.html#parsing-main-intable). + # Most content in an HTML5 document can be parsed assuming the parser's mode will be in the + # ["in body" insertion mode](https://html.spec.whatwg.org/multipage/parsing.html#parsing-main-inbody), + # but there are some notable exceptions. Perhaps the most problematic to web developers are the + # table-related tags, which will not be parsed properly unless the parser is in the + # ["in table" insertion mode](https://html.spec.whatwg.org/multipage/parsing.html#parsing-main-intable). # - # For example: + # For example: # - # ``` ruby - # Nokogiri::HTML5::DocumentFragment.parse("foo").to_html - # # => "foo" - # ``` + # ``` ruby + # Nokogiri::HTML5::DocumentFragment.parse("foo").to_html + # # => "foo" # where did the tag go!? + # ``` # - # In the default "in body" mode, the parser will log an error, "Start tag 'td' isn't allowed here", - # and drop the tag. This fragment must be parsed "in the context" of a table in order to parse - # properly. Thankfully, libgumbo and Nokogiri allow us to do this: + # In the default "in body" mode, the parser will log an error, "Start tag 'td' isn't allowed here", + # and drop the tag. This particular fragment must be parsed "in the context" of a table in order to + # parse properly. # - # ``` ruby - # Nokogiri::HTML5::DocumentFragment.new( - # Nokogiri::HTML5::Document.new, - # "foo", - # "table" # this is the context node - # ).to_html - # # => "foo" - # ``` + # Thankfully, libgumbo and Nokogiri allow us to set the context node: # - # This is _almost_ correct, but we're seeing another HTML5 parsing rule in action: there may be - # _intermediate parent tags_ that the HTML5 spec requires to be inserted by the parser. In this case, - # the `` tag must be wrapped in `` tags. + # ``` ruby + # Nokogiri::HTML5::DocumentFragment.new( + # Nokogiri::HTML5::Document.new, + # "foo", + # "table" # <--- this is the context node + # ).to_html + # # => "foo" + # ``` # - # We can narrow down the result set with an XPath query to get back only the intended tags: + # This result is _almost_ correct, but we're seeing another HTML5 parsing rule in action: there may be + # _intermediate parent tags_ that the HTML5 spec requires to be inserted by the parser. In this case, + # the `` tag must be wrapped in `` tags. # - # ``` ruby - # Nokogiri::HTML5::DocumentFragment.new( - # Nokogiri::HTML5::Document.new, - # "foo", - # "table" # this is the context node - # ).xpath("tbody/tr/*").to_html - # # => "foo" - # ``` + # We can narrow down the result set with an XPath query to get back only the intended tags: # - # Hurrah! This is precisely what Nokogiri::HTML5::Inference.parse does: + # ``` ruby + # Nokogiri::HTML5::DocumentFragment.new( + # Nokogiri::HTML5::Document.new, + # "foo", + # "table" # this is the context node + # ).xpath("tbody/tr/*").to_html + # # => "foo" + # ``` # - # ``` ruby - # Nokogiri::HTML5::Inference.parse("foo").to_html - # # => "foo" - # ``` + # Huzzah! That works. And it's precisely what Nokogiri::HTML5::Inference.parse does: # + # ``` ruby + # Nokogiri::HTML5::Inference.parse("foo").to_html + # # => "foo" + # ``` module Inference # Tags that must be parsed in a specific HTML5 insertion mode, for which we must use a # context node.