Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

FixBug of jsoup Inconsistent Handling Of Duplicate Attributes #1719 #1750

Open
wants to merge 3 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
8 changes: 8 additions & 0 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -313,6 +313,14 @@

<dependencies>

<dependency>
<groupId>org.assertj</groupId>
<artifactId>assertj-core</artifactId>
<!-- use 2.8.0 for Java 7 projects -->
<version>3.8.0</version>
<scope>test</scope>
</dependency>

<!-- junit -->
<dependency>
<groupId>org.junit.jupiter</groupId>
Expand Down
14 changes: 14 additions & 0 deletions src/main/java/org/jsoup/parser/HtmlTreeBuilderState.java
Original file line number Diff line number Diff line change
Expand Up @@ -316,11 +316,25 @@ boolean process(Token t, HtmlTreeBuilder tb) {
return true;
}

/**
* Current StartTag is inBody, and need to insert next tags, consist of different situations.

* @param t next want to add to tb tree.
* @param tb HtmlTreeBuilder to construct the nodes as tree
*/

private boolean inBodyStartTag(Token t, HtmlTreeBuilder tb) {
final Token.StartTag startTag = t.asStartTag();
final String name = startTag.normalName();
final ArrayList<Element> stack;
Element el;
// cleanup duplicate attributes:
if (startTag.hasAttributes() && !startTag.attributes.isEmpty()) {
int dupes = startTag.attributes.deduplicate(tb.settings);
if (dupes > 0) {
tb.error("Dropped duplicate attribute(s) in tag [%s]", startTag.normalName);
}
}

switch (name) {
case "a":
Expand Down
113 changes: 113 additions & 0 deletions src/test/java/org/jsoup/parser/DuplicateAttributes_Issue1719.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
package org.jsoup.parser;

import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.junit.jupiter.api.Test;

import static org.assertj.core.api.Assertions.assertThat;

public class DuplicateAttributes_Issue1719 {
private static final String HEAD = "<html>\n" +
" <head></head>\n" +
" <body>\n" +
" ";
private static final String TRAIL = "\n" +
" </body>\n" +
"</html>";
public static final String DESIRED_XML_IMG_TAG = "<img src=\"file.png\" name=\"test\" value=\"foo\" type=\"hidden\" />";
public static final String DESIRED_HTML_IMG_TAG = "<img src=\"file.png\" name=\"test\" value=\"foo\" type=\"hidden\">";

public static final String IMG_INPUT = "<img src=\"file.png\" name=\"test\" value=\"foo\" type=\"hidden\" value=\"bar\" />";

public static final String IMG_INPUT_NO_SLASH = "<img src=\"file.png\" name=\"test\" value=\"foo\" type=\"hidden\" value=\"bar\">";


public static final String AREA_INPUT = "<html>\n" +
" <body>\n" +
" <a href='#1'>\n" +
" <div>\n" +
// " <a href='#2' class = 'hello' class = world'>\nh2<\a>" +
" </div>\n" +
" </a>\n" +
"<area href='#2' class = 'hello' class = world'>\n"+
" </body>\n" +
"</html>";

public static final String DESIRED_HTML_AREA_TAG = "<area href=\"#2\" class=\"hello\">";

@Test
void parserXML() {
String doubleTag = IMG_INPUT;
Parser parser = Parser.xmlParser().setTrackErrors(10);
Document doc = parser.parseInput(doubleTag, "");
System.out.println(doc.html());

assertThat(doc.selectFirst("img").outerHtml()).isNotBlank().isEqualTo(DESIRED_XML_IMG_TAG);
}

@Test
void parserHTML_IMAGE() {
String doubleTag = IMG_INPUT;
Parser parser = Parser.htmlParser().setTrackErrors(10);
Document doc = parser.parseInput(doubleTag, "");

// System.out.println(doc.html());
assertThat(doc.selectFirst("img").outerHtml()).isNotBlank().isEqualTo(DESIRED_HTML_IMG_TAG);
}

@Test
void parserHTML_AREA() {
String doubleTag = AREA_INPUT;
Parser parser = Parser.htmlParser().setTrackErrors(10);
Document doc = parser.parseInput(doubleTag, "");

// System.out.println(doc.html());
assertThat(doc.selectFirst("area").outerHtml()).isNotBlank().isEqualTo(DESIRED_HTML_AREA_TAG);
}

@Test
void parserXML_toXML() {
String doubleTag = IMG_INPUT;
Parser parser = Parser.xmlParser().setTrackErrors(10);
Document doc = parser.parseInput(doubleTag, "");
doc.outputSettings().syntax(Document.OutputSettings.Syntax.xml);

assertThat(doc.selectFirst("img").outerHtml()).isNotBlank().isEqualTo(DESIRED_XML_IMG_TAG);
}

@Test
void parserHTML_toXML() {
String doubleTag = IMG_INPUT;
Parser parser = Parser.htmlParser().setTrackErrors(10);
Document doc = parser.parseInput(doubleTag, "");
doc.outputSettings().syntax(Document.OutputSettings.Syntax.xml);

assertThat(doc.selectFirst("img").outerHtml()).isNotBlank().isEqualTo(DESIRED_XML_IMG_TAG);
}

@Test
void jsoupParseToXML() {
String doubleTag = IMG_INPUT;

final Document document = Jsoup.parse(doubleTag);
document.outputSettings().syntax(Document.OutputSettings.Syntax.xml);

String outputXhtml = document.html()
.replaceAll("&nbsp;", "&#160;");// nbsp does not exist in xhtml.

assertThat(outputXhtml).isNotBlank().isEqualTo(HEAD + DESIRED_XML_IMG_TAG + TRAIL);
}

@Test
void jsoupParseToXML_outerMethod() {
String doubleTag = IMG_INPUT;

final Document document = Jsoup.parse(doubleTag);
document.outputSettings().syntax(Document.OutputSettings.Syntax.xml);

String outputXhtml = document.outerHtml()
.replaceAll("&nbsp;", "&#160;");// nbsp does not exist in xhtml.

assertThat(outputXhtml).isNotBlank().isEqualTo(HEAD + DESIRED_XML_IMG_TAG + TRAIL);
}
}