Skip to content

Commit

Permalink
mmcdole#151: Allow additional parsers for feed formats. Currently onl…
Browse files Browse the repository at this point in the history
…y atom is allowed as part of RSS
  • Loading branch information
Necoro committed Jan 3, 2024
1 parent 068281a commit 3c6e1eb
Show file tree
Hide file tree
Showing 6 changed files with 167 additions and 101 deletions.
205 changes: 112 additions & 93 deletions atom/parser.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,9 @@ var (
"uri": true,
"url": true, // atom 0.3
}

// No known explicit extension parsers for Atom, currently
emptyExtParsers = make(shared.ExtParsers)
)

// Parser is an Atom Parser
Expand All @@ -38,6 +41,14 @@ func (ap *Parser) Parse(feed io.Reader) (*Feed, error) {
return ap.parseRoot(p)
}

func (ap *Parser) ParseAsExtension(p *xpp.XMLPullParser) (interface{}, error) {
entry := &Entry{}
if err := ap.parseEntryContent(p, entry); err != nil {
return nil, err
}
return entry, nil
}

func (ap *Parser) parseRoot(p *xpp.XMLPullParser) (*Feed, error) {
if err := p.Expect(xpp.StartTag, "feed"); err != nil {
return nil, err
Expand Down Expand Up @@ -69,7 +80,7 @@ func (ap *Parser) parseRoot(p *xpp.XMLPullParser) (*Feed, error) {
name := strings.ToLower(p.Name)

if shared.IsExtension(p) {
e, err := shared.ParseExtension(extensions, p)
e, err := shared.ParseExtension(extensions, p, emptyExtParsers)
if err != nil {
return nil, err
}
Expand Down Expand Up @@ -215,103 +226,14 @@ func (ap *Parser) parseEntry(p *xpp.XMLPullParser) (*Entry, error) {
}

if tok == xpp.StartTag {

name := strings.ToLower(p.Name)

if shared.IsExtension(p) {
e, err := shared.ParseExtension(extensions, p)
e, err := shared.ParseExtension(extensions, p, emptyExtParsers)
if err != nil {
return nil, err
}
extensions = e
} else if name == "title" {
result, err := ap.parseAtomText(p)
if err != nil {
return nil, err
}
entry.Title = result
} else if name == "id" {
result, err := ap.parseAtomText(p)
if err != nil {
return nil, err
}
entry.ID = result
} else if name == "rights" ||
name == "copyright" {
result, err := ap.parseAtomText(p)
if err != nil {
return nil, err
}
entry.Rights = result
} else if name == "summary" {
result, err := ap.parseAtomText(p)
if err != nil {
return nil, err
}
entry.Summary = result
} else if name == "source" {
result, err := ap.parseSource(p)
if err != nil {
return nil, err
}
entry.Source = result
} else if name == "updated" ||
name == "modified" {
result, err := ap.parseAtomText(p)
if err != nil {
return nil, err
}
entry.Updated = result
date, err := shared.ParseDate(result)
if err == nil {
utcDate := date.UTC()
entry.UpdatedParsed = &utcDate
}
} else if name == "contributor" {
result, err := ap.parsePerson("contributor", p)
if err != nil {
return nil, err
}
entry.Contributors = append(entry.Contributors, result)
} else if name == "author" {
result, err := ap.parsePerson("author", p)
if err != nil {
return nil, err
}
entry.Authors = append(entry.Authors, result)
} else if name == "category" {
result, err := ap.parseCategory(p)
if err != nil {
return nil, err
}
entry.Categories = append(entry.Categories, result)
} else if name == "link" {
result, err := ap.parseLink(p)
if err != nil {
return nil, err
}
entry.Links = append(entry.Links, result)
} else if name == "published" ||
name == "issued" {
result, err := ap.parseAtomText(p)
if err != nil {
return nil, err
}
entry.Published = result
date, err := shared.ParseDate(result)
if err == nil {
utcDate := date.UTC()
entry.PublishedParsed = &utcDate
}
} else if name == "content" {
result, err := ap.parseContent(p)
if err != nil {
return nil, err
}
entry.Content = result
} else {
err := p.Skip()
if err != nil {
if err := ap.parseEntryContent(p, entry); err != nil {
return nil, err
}
}
Expand All @@ -329,6 +251,103 @@ func (ap *Parser) parseEntry(p *xpp.XMLPullParser) (*Entry, error) {
return entry, nil
}

func (ap *Parser) parseEntryContent(p *xpp.XMLPullParser, entry *Entry) error {
name := strings.ToLower(p.Name)

if name == "title" {
result, err := ap.parseAtomText(p)
if err != nil {
return err
}
entry.Title = result
} else if name == "id" {
result, err := ap.parseAtomText(p)
if err != nil {
return err
}
entry.ID = result
} else if name == "rights" ||
name == "copyright" {
result, err := ap.parseAtomText(p)
if err != nil {
return err
}
entry.Rights = result
} else if name == "summary" {
result, err := ap.parseAtomText(p)
if err != nil {
return err
}
entry.Summary = result
} else if name == "source" {
result, err := ap.parseSource(p)
if err != nil {
return err
}
entry.Source = result
} else if name == "updated" ||
name == "modified" {
result, err := ap.parseAtomText(p)
if err != nil {
return err
}
entry.Updated = result
date, err := shared.ParseDate(result)
if err == nil {
utcDate := date.UTC()
entry.UpdatedParsed = &utcDate
}
} else if name == "contributor" {
result, err := ap.parsePerson("contributor", p)
if err != nil {
return err
}
entry.Contributors = append(entry.Contributors, result)
} else if name == "author" {
result, err := ap.parsePerson("author", p)
if err != nil {
return err
}
entry.Authors = append(entry.Authors, result)
} else if name == "category" {
result, err := ap.parseCategory(p)
if err != nil {
return err
}
entry.Categories = append(entry.Categories, result)
} else if name == "link" {
result, err := ap.parseLink(p)
if err != nil {
return err
}
entry.Links = append(entry.Links, result)
} else if name == "published" ||
name == "issued" {
result, err := ap.parseAtomText(p)
if err != nil {
return err
}
entry.Published = result
date, err := shared.ParseDate(result)
if err == nil {
utcDate := date.UTC()
entry.PublishedParsed = &utcDate
}
} else if name == "content" {
result, err := ap.parseContent(p)
if err != nil {
return err
}
entry.Content = result
} else {
err := p.Skip()
if err != nil {
return err
}
}
return nil
}

func (ap *Parser) parseSource(p *xpp.XMLPullParser) (*Source, error) {

if err := p.Expect(xpp.StartTag, "source"); err != nil {
Expand Down Expand Up @@ -358,7 +377,7 @@ func (ap *Parser) parseSource(p *xpp.XMLPullParser) (*Source, error) {
name := strings.ToLower(p.Name)

if shared.IsExtension(p) {
e, err := shared.ParseExtension(extensions, p)
e, err := shared.ParseExtension(extensions, p, emptyExtParsers)
if err != nil {
return nil, err
}
Expand Down
1 change: 1 addition & 0 deletions extensions/extensions.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ type Extension struct {
Value string `json:"value"`
Attrs map[string]string `json:"attrs"`
Children map[string][]Extension `json:"children"`
Parsed interface{} `json:"parsed,omitempty"`
}

func parseTextExtension(name string, extensions map[string][]Extension) (value string) {
Expand Down
33 changes: 31 additions & 2 deletions internal/shared/extparser.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,12 @@ import (
"github.com/mmcdole/goxpp"
)

type ExtParser interface {
ParseAsExtension(p *xpp.XMLPullParser) (interface{}, error)
}

type ExtParsers map[string]ExtParser

// IsExtension returns whether or not the current
// XML element is an extension element (if it has a
// non empty prefix)
Expand All @@ -22,10 +28,16 @@ func IsExtension(p *xpp.XMLPullParser) bool {
// ParseExtension parses the current element of the
// XMLPullParser as an extension element and updates
// the extension map
func ParseExtension(fe ext.Extensions, p *xpp.XMLPullParser) (ext.Extensions, error) {
func ParseExtension(fe ext.Extensions, p *xpp.XMLPullParser, extParsers ExtParsers) (ext.Extensions, error) {
prefix := prefixForNamespace(p.Space, p)

result, err := parseExtensionElement(p)
var result ext.Extension
var err error
if extParser, ok := extParsers[prefix]; ok {
result, err = parseExtensionFromParser(p, extParser)
} else {
result, err = parseExtensionElement(p)
}
if err != nil {
return nil, err
}
Expand All @@ -43,6 +55,23 @@ func ParseExtension(fe ext.Extensions, p *xpp.XMLPullParser) (ext.Extensions, er
return fe, nil
}

func parseExtensionFromParser(p *xpp.XMLPullParser, extParser ExtParser) (e ext.Extension, err error) {
if err = p.Expect(xpp.StartTag, "*"); err != nil {
return e, err
}

e.Name = p.Name
if e.Parsed, err = extParser.ParseAsExtension(p); err != nil {
return e, err
}

if err = p.Expect(xpp.EndTag, e.Name); err != nil {
return e, err
}

return e, nil
}

func parseExtensionElement(p *xpp.XMLPullParser) (e ext.Extension, err error) {
if err = p.Expect(xpp.StartTag, "*"); err != nil {
return e, err
Expand Down
14 changes: 13 additions & 1 deletion parser.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import (

"github.com/mmcdole/gofeed/atom"
"github.com/mmcdole/gofeed/json"
"github.com/mmcdole/gofeed/internal/shared"
"github.com/mmcdole/gofeed/rss"
)

Expand Down Expand Up @@ -155,8 +156,19 @@ func (f *Parser) parseAtomFeed(feed io.Reader) (*Feed, error) {
return f.atomTrans().Translate(af)
}

func (f *Parser) BuildRSSExtParsers() shared.ExtParsers {
extParsers := make(shared.ExtParsers, 3)

// all possible atom variants
extParsers["atom"] = f.ap
extParsers["atom10"] = f.ap
extParsers["atom03"] = f.ap

return extParsers
}

func (f *Parser) parseRSSFeed(feed io.Reader) (*Feed, error) {
rf, err := f.rp.Parse(feed)
rf, err := f.rp.Parse(feed, f.BuildRSSExtParsers())
if err != nil {
return nil, err
}
Expand Down
12 changes: 8 additions & 4 deletions rss/parser.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,14 @@ import (
)

// Parser is a RSS Parser
type Parser struct{}
type Parser struct {
extParsers shared.ExtParsers
}

// Parse parses an xml feed into an rss.Feed
func (rp *Parser) Parse(feed io.Reader) (*Feed, error) {
func (rp *Parser) Parse(feed io.Reader, extParsers shared.ExtParsers) (*Feed, error) {
p := xpp.NewXMLPullParser(feed, false, shared.NewReaderLabel)
rp.extParsers = extParsers

_, err := shared.FindRoot(p)
if err != nil {
Expand Down Expand Up @@ -141,7 +144,8 @@ func (rp *Parser) parseChannel(p *xpp.XMLPullParser) (rss *Feed, err error) {
name := strings.ToLower(p.Name)

if shared.IsExtension(p) {
ext, err := shared.ParseExtension(extensions, p)

ext, err := shared.ParseExtension(extensions, p, rp.extParsers)
if err != nil {
return nil, err
}
Expand Down Expand Up @@ -338,7 +342,7 @@ func (rp *Parser) parseItem(p *xpp.XMLPullParser) (item *Item, err error) {
name := strings.ToLower(p.Name)

if shared.IsExtension(p) {
ext, err := shared.ParseExtension(extensions, p)
ext, err := shared.ParseExtension(extensions, p, rp.extParsers)
if err != nil {
return nil, err
}
Expand Down
3 changes: 2 additions & 1 deletion rss/parser_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import (
"strings"
"testing"

"github.com/mmcdole/gofeed"
"github.com/mmcdole/gofeed/rss"
"github.com/stretchr/testify/assert"
)
Expand All @@ -27,7 +28,7 @@ func TestParser_Parse(t *testing.T) {

// Parse actual feed
fp := &rss.Parser{}
actual, _ := fp.Parse(bytes.NewReader(f))
actual, _ := fp.Parse(bytes.NewReader(f), gofeed.NewParser().BuildRSSExtParsers())

// Get json encoded expected feed result
ef := fmt.Sprintf("../testdata/parser/rss/%s.json", name)
Expand Down

0 comments on commit 3c6e1eb

Please sign in to comment.