-
Notifications
You must be signed in to change notification settings - Fork 1
/
postprocess_test.go
77 lines (68 loc) · 1.77 KB
/
postprocess_test.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
package youcrawl
import (
"fmt"
"os"
"sync"
"testing"
)
type PrintGlobalStorePostProcess struct {
}
func (p *PrintGlobalStorePostProcess) Process(store GlobalStore) error {
rawItems := store.GetValue("items")
if rawItems != nil {
items := rawItems.([]interface{})
fmt.Println(fmt.Sprintf("total crawl %d items", len(items)))
}
return nil
}
func TestPostProcess(t *testing.T) {
e := NewEngine(&EngineOption{MaxRequest: 2})
urls := []string{"https://example.com", "https://example.com", "https://example.com"}
e.AddURLs(urls...)
e.AddHTMLParser(func(ctx *Context) error {
item := ctx.Item.(DefaultItem)
doc := ctx.Doc
title := doc.Find("title").Text()
item.SetValue("title", title)
return nil
})
e.UseMiddleware(&UserAgentMiddleware{})
e.AddPipelines(&GlobalStorePipeline{})
e.AddPostProcess(&PrintGlobalStorePostProcess{})
var wg sync.WaitGroup
wg.Add(1)
e.Run(&wg)
wg.Wait()
}
func TestOutputCSVPostProcess_Process(t *testing.T) {
store := &MemoryGlobalStore{
Content: map[string]interface{}{},
}
items := make([]map[string]interface{}, 0)
for idx := 0; idx < 10; idx++ {
item := make(map[string]interface{})
item["title"] = fmt.Sprintf("title %d", idx)
item["content"] = fmt.Sprintf("content %d", idx)
item["ignore"] = fmt.Sprintf("ignore %d", idx)
if idx%2 == 0 {
item["exist"] = true
}
items = append(items, item)
}
store.SetValue("items", items)
postprocess := NewOutputCSVPostProcess(OutputCSVPostProcessOption{
OutputPath: "./output.csv",
WithHeader: true,
Keys: []string{"title", "content", "exist"},
KeysMapping: map[string]string{
"title": "webTitle",
"exist": "webExist",
},
NotExistValue: "Undefined",
})
defer os.Remove("./output.csv")
err := postprocess.Process(store)
if err != nil {
t.Error(err)
}
}