Skip to content

Commit

Permalink
chore: reduce token spend, filter out mentions at (#173)
Browse files Browse the repository at this point in the history
  • Loading branch information
nekomeowww committed Aug 21, 2023
1 parent adedee6 commit 623f9a5
Show file tree
Hide file tree
Showing 6 changed files with 163 additions and 57 deletions.
5 changes: 3 additions & 2 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,8 @@ jobs:
uses: golangci/[email protected]
with:
# Optional: golangci-lint command line arguments.
args: '--timeout=10m'
version: v1.54.1
args: "--timeout=10m"

unittest:
name: Unit Test
Expand All @@ -127,7 +128,7 @@ jobs:
image: postgres
# Provide the password for postgres
env:
POSTGRES_PASSWORD: '123456'
POSTGRES_PASSWORD: "123456"
# Set health checks to wait until postgres has started
options: >-
--health-cmd pg_isready
Expand Down
36 changes: 36 additions & 0 deletions internal/models/chathistories/chat_histories.go
Original file line number Diff line number Diff line change
Expand Up @@ -363,10 +363,43 @@ func formatFullNameAndUsername(fullName, username string) string {
return strings.ReplaceAll(fullName, "#", "")
}

func (m *Model) encodeMessageIDIntoVirtualMessageID(histories []*ent.ChatHistories) map[int64]int64 {
virtualMessageID := int64(1)
mMessageIDToVirtualMessageID := make(map[int64]int64)

for _, message := range histories {
mMessageIDToVirtualMessageID[virtualMessageID] = message.MessageID
message.MessageID = virtualMessageID
virtualMessageID++

if message.RepliedToMessageID != 0 {
mMessageIDToVirtualMessageID[virtualMessageID] = message.RepliedToMessageID
message.RepliedToMessageID = virtualMessageID
virtualMessageID++
}
}

return mMessageIDToVirtualMessageID
}

func (m *Model) decodeMessageIDFromVirtualMessageID(mMessageIDToVirtualMessageID map[int64]int64, outputs []*openai.ChatHistorySummarizationOutputs) {
for _, o := range outputs {
for _, d := range o.Discussion {
d.KeyIDs = lo.Map(d.KeyIDs, func(virtualMessageID int64, i int) int64 {
return mMessageIDToVirtualMessageID[virtualMessageID]
})
}

o.SinceID = mMessageIDToVirtualMessageID[o.SinceID]
}
}

func (m *Model) SummarizeChatHistories(chatID int64, chatType telegram.ChatType, histories []*ent.ChatHistories) (uuid.UUID, []string, error) {
historiesLLMFriendly := make([]string, 0, len(histories))
historiesIncludedMessageIDs := make([]int64, 0)

mMessageIDToVirtualMessageID := m.encodeMessageIDIntoVirtualMessageID(histories)

for _, message := range histories {
if message.RepliedToMessageID == 0 {
historiesLLMFriendly = append(historiesLLMFriendly, fmt.Sprintf(
Expand Down Expand Up @@ -402,6 +435,9 @@ func (m *Model) SummarizeChatHistories(chatID int64, chatType telegram.ChatType,
return uuid.Nil, make([]string, 0), err
}

// reverse virtual message id to real message id
m.decodeMessageIDFromVirtualMessageID(mMessageIDToVirtualMessageID, summarizations)

ss, err := m.renderRecapTemplates(chatID, chatType, summarizations)
if err != nil {
return uuid.Nil, make([]string, 0), err
Expand Down
61 changes: 61 additions & 0 deletions internal/models/chathistories/chat_histories_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ import (
"github.com/nekomeowww/insights-bot/internal/configs"
"github.com/nekomeowww/insights-bot/internal/datastore"
"github.com/nekomeowww/insights-bot/internal/lib"
"github.com/nekomeowww/insights-bot/internal/thirdparty/openai"
"github.com/nekomeowww/insights-bot/internal/thirdparty/openai/openaimock"
"github.com/nekomeowww/insights-bot/pkg/tutils"
"github.com/nekomeowww/xo"
Expand Down Expand Up @@ -233,3 +234,63 @@ func TestFindLastOneHourChatHistories(t *testing.T) {
return item.MessageID
}))
}

func TestEncodeMessageIDIntoVirtualMessageID(t *testing.T) {
messageID1 := xo.RandomInt64()
messageID2 := xo.RandomInt64()
messageID3 := xo.RandomInt64()
replyToMessageID1 := xo.RandomInt64()

mVirtualIDs := model.encodeMessageIDIntoVirtualMessageID([]*ent.ChatHistories{
{MessageID: messageID1, RepliedToMessageID: replyToMessageID1},
{MessageID: messageID2},
{MessageID: messageID3},
})

assert.Equal(t, map[int64]int64{
1: messageID1,
2: replyToMessageID1,
3: messageID2,
4: messageID3,
}, mVirtualIDs)
}

func TestDecodeMessageIDFromVirtualMessageID(t *testing.T) {
messageID1 := xo.RandomInt64()
messageID2 := xo.RandomInt64()
messageID3 := xo.RandomInt64()
replyToMessageID1 := xo.RandomInt64()

mVirtualIDs := map[int64]int64{
1: messageID1,
2: replyToMessageID1,
3: messageID2,
4: messageID3,
}

outputs := []*openai.ChatHistorySummarizationOutputs{
{SinceID: 1, Discussion: []*openai.ChatHistorySummarizationOutputsDiscussion{
{KeyIDs: []int64{1, 2}},
}},
{SinceID: 3, Discussion: []*openai.ChatHistorySummarizationOutputsDiscussion{
{KeyIDs: []int64{3}},
}},
{SinceID: 4, Discussion: []*openai.ChatHistorySummarizationOutputsDiscussion{
{KeyIDs: []int64{4}},
}},
}

model.decodeMessageIDFromVirtualMessageID(mVirtualIDs, outputs)

assert.Equal(t, []*openai.ChatHistorySummarizationOutputs{
{SinceID: messageID1, Discussion: []*openai.ChatHistorySummarizationOutputsDiscussion{
{KeyIDs: []int64{messageID1, replyToMessageID1}},
}},
{SinceID: messageID2, Discussion: []*openai.ChatHistorySummarizationOutputsDiscussion{
{KeyIDs: []int64{messageID2}},
}},
{SinceID: messageID3, Discussion: []*openai.ChatHistorySummarizationOutputsDiscussion{
{KeyIDs: []int64{messageID3}},
}},
}, outputs)
}
26 changes: 18 additions & 8 deletions internal/models/chathistories/recap.go
Original file line number Diff line number Diff line change
Expand Up @@ -109,13 +109,13 @@ func (m *Model) summarizeChatHistoriesSlice(chatID int64, s string) ([]*openai.C
return outputs, resp.Usage, nil
}

func filterOutInvalidFields(messageIDs []int64, outputs []*openai.ChatHistorySummarizationOutputs) []*openai.ChatHistorySummarizationOutputs {
for i := range outputs {
func filterOutInvalidFields(messageIDs []int64) func(output *openai.ChatHistorySummarizationOutputs, _ int) *openai.ChatHistorySummarizationOutputs {
return func(output *openai.ChatHistorySummarizationOutputs, _ int) *openai.ChatHistorySummarizationOutputs {
// limit key ids to 5
outputs[i].Participants = lo.Uniq(outputs[i].Participants)
output.Participants = lo.Uniq(output.Participants)

// filter out non-exist message ids
for _, d := range outputs[i].Discussion {
for _, d := range output.Discussion {
d.KeyIDs = lo.Filter(d.KeyIDs, func(item int64, _ int) bool {
return lo.Contains(messageIDs, item) && item != 0
})
Expand All @@ -128,12 +128,12 @@ func filterOutInvalidFields(messageIDs []int64, outputs []*openai.ChatHistorySum
}
}

outputs[i].Discussion = lo.Filter(outputs[i].Discussion, func(item *openai.ChatHistorySummarizationOutputsDiscussion, _ int) bool {
output.Discussion = lo.Filter(output.Discussion, func(item *openai.ChatHistorySummarizationOutputsDiscussion, _ int) bool {
return len(item.KeyIDs) > 0 && item.Point != ""
})
}

return outputs
return output
}
}

func filterOutInvalidOutputFilterFunc(output *openai.ChatHistorySummarizationOutputs, _ int) bool {
Expand All @@ -144,6 +144,14 @@ func filterOutInvalidOutputFilterFunc(output *openai.ChatHistorySummarizationOut
len(output.Discussion) > 0 // filter out empty discussion
}

func filterOutMention(output *openai.ChatHistorySummarizationOutputs, _ int) *openai.ChatHistorySummarizationOutputs {
output.Participants = lo.Map(output.Participants, func(item string, _ int) string {
return strings.TrimPrefix(item, "@")
})

return output
}

func (m *Model) summarizeChatHistories(chatID int64, messageIDs []int64, llmFriendlyChatHistories string) ([]*openai.ChatHistorySummarizationOutputs, goopenai.Usage, error) {
chatHistoriesSlices := m.openAI.SplitContentBasedByTokenLimitations(llmFriendlyChatHistories, 15000)
chatHistoriesSummarizations := make([]*openai.ChatHistorySummarizationOutputs, 0, len(chatHistoriesSlices))
Expand All @@ -168,9 +176,11 @@ func (m *Model) summarizeChatHistories(chatID int64, messageIDs []int64, llmFrie
}

// filter out invalid fields
o = filterOutInvalidFields(messageIDs, o)
o = lo.Map(o, filterOutInvalidFields(messageIDs))
// filter out empty outputs
o = lo.Filter(o, filterOutInvalidOutputFilterFunc)
// filter out mentions
o = lo.Map(o, filterOutMention)

if len(o) == 0 {
m.logger.Error(fmt.Sprintf("no valid outputs from chat histories slice: %s, tried %d...", s, tried),
Expand Down
78 changes: 38 additions & 40 deletions internal/models/chathistories/recap_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,84 +11,82 @@ import (

func TestFilterOutInvalidFields(t *testing.T) {
t.Run("UniqParticipants", func(t *testing.T) {
outputs := filterOutInvalidFields([]int64{}, []*openai.ChatHistorySummarizationOutputs{
{
Participants: []string{"User 1", "User 1"},
},
})
output := filterOutInvalidFields([]int64{})(&openai.ChatHistorySummarizationOutputs{
Participants: []string{"User 1", "User 1"},
}, 0)

assert.Equal(t, []string{"User 1"}, outputs[0].Participants)
assert.Equal(t, []string{"User 1"}, output.Participants)
})

t.Run("FilterOutNonExistMessageIDAndZeroMessageID", func(t *testing.T) {
outputs := filterOutInvalidFields([]int64{1, 2, 3, 4}, []*openai.ChatHistorySummarizationOutputs{
{
Discussion: []*openai.ChatHistorySummarizationOutputsDiscussion{
{Point: "Point 1", KeyIDs: []int64{0, 1, 2}},
{Point: "Point 2", KeyIDs: []int64{3, 4, 5}},
},
output := filterOutInvalidFields([]int64{1, 2, 3, 4})(&openai.ChatHistorySummarizationOutputs{
Discussion: []*openai.ChatHistorySummarizationOutputsDiscussion{
{Point: "Point 1", KeyIDs: []int64{0, 1, 2}},
{Point: "Point 2", KeyIDs: []int64{3, 4, 5}},
},
})
}, 0)

assert.Equal(t, []*openai.ChatHistorySummarizationOutputsDiscussion{
{Point: "Point 1", KeyIDs: []int64{1, 2}},
{Point: "Point 2", KeyIDs: []int64{3, 4}},
}, outputs[0].Discussion)
}, output.Discussion)
})

t.Run("UniqKeyID", func(t *testing.T) {
outputs := filterOutInvalidFields([]int64{1, 2, 3, 4}, []*openai.ChatHistorySummarizationOutputs{
{
Discussion: []*openai.ChatHistorySummarizationOutputsDiscussion{
{Point: "Point 1", KeyIDs: []int64{1, 2, 2}},
{Point: "Point 2", KeyIDs: []int64{3, 4, 4}},
},
output := filterOutInvalidFields([]int64{1, 2, 3, 4})(&openai.ChatHistorySummarizationOutputs{
Discussion: []*openai.ChatHistorySummarizationOutputsDiscussion{
{Point: "Point 1", KeyIDs: []int64{1, 2, 2}},
{Point: "Point 2", KeyIDs: []int64{3, 4, 4}},
},
})
}, 0)

assert.Equal(t, []*openai.ChatHistorySummarizationOutputsDiscussion{
{Point: "Point 1", KeyIDs: []int64{1, 2}},
{Point: "Point 2", KeyIDs: []int64{3, 4}},
}, outputs[0].Discussion)
}, output.Discussion)
})

t.Run("LimitKeyIDsTo5", func(t *testing.T) {
outputs := filterOutInvalidFields([]int64{1, 2, 3, 4, 5, 6, 7, 8, 9}, []*openai.ChatHistorySummarizationOutputs{
{
Participants: []string{"a"},
Discussion: []*openai.ChatHistorySummarizationOutputsDiscussion{
{Point: "Point 1", KeyIDs: []int64{1, 2, 3, 4, 5, 6, 7, 8, 9}},
},
output := filterOutInvalidFields([]int64{1, 2, 3, 4, 5, 6, 7, 8, 9})(&openai.ChatHistorySummarizationOutputs{
Participants: []string{"a"},
Discussion: []*openai.ChatHistorySummarizationOutputsDiscussion{
{Point: "Point 1", KeyIDs: []int64{1, 2, 3, 4, 5, 6, 7, 8, 9}},
},
})
}, 0)

assert.Equal(t, []*openai.ChatHistorySummarizationOutputsDiscussion{
{Point: "Point 1", KeyIDs: []int64{1, 2, 3, 4, 5}},
}, outputs[0].Discussion)
}, output.Discussion)
})

t.Run("FilterOutEmptyKeyIDsAndEmptyPointFromDiscussion", func(t *testing.T) {
outputs := filterOutInvalidFields([]int64{1, 2, 3, 4}, []*openai.ChatHistorySummarizationOutputs{
{
Participants: []string{"a"},
Discussion: []*openai.ChatHistorySummarizationOutputsDiscussion{
{Point: "Point 1", KeyIDs: []int64{1, 2}},
{Point: "", KeyIDs: []int64{}},
{Point: "", KeyIDs: []int64{3, 4}},
},
output := filterOutInvalidFields([]int64{1, 2, 3, 4})(&openai.ChatHistorySummarizationOutputs{
Participants: []string{"a"},
Discussion: []*openai.ChatHistorySummarizationOutputsDiscussion{
{Point: "Point 1", KeyIDs: []int64{1, 2}},
{Point: "", KeyIDs: []int64{}},
{Point: "", KeyIDs: []int64{3, 4}},
},
})
}, 0)

assert.Equal(t, []*openai.ChatHistorySummarizationOutputsDiscussion{
{Point: "Point 1", KeyIDs: []int64{1, 2}},
}, outputs[0].Discussion)
}, output.Discussion)
})
}

func TestFilterOutInvalidOutputFilterFunc(t *testing.T) {
assert.False(t, filterOutInvalidOutputFilterFunc(&openai.ChatHistorySummarizationOutputs{}, 0))
}

func TestFilterOutMention(t *testing.T) {
output := filterOutMention(&openai.ChatHistorySummarizationOutputs{
Participants: []string{"@User 1", "@User 2"},
}, 0)

assert.Equal(t, []string{"User 1", "User 2"}, output.Participants)
}

func TestRecapOutputTemplateExecute(t *testing.T) { //nolint:dupl
sb := new(strings.Builder)
err := RecapOutputTemplate.Execute(sb, RecapOutputTemplateInputs{
Expand Down
14 changes: 7 additions & 7 deletions pkg/linkprev/linkprev_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,13 +39,13 @@ func TestPreview(t *testing.T) {
meta, err := NewClient().Preview(context.Background(), "https://twitter.com/GoogleDevEurope/status/1640667303158198272")
require.NoError(t, err)
assert.Equal(t, Meta{
Title: "Google for Developers Europe on Twitter: \"🎉 Happy Birthday @golang!\n\nDid you know that 11 years ago today Go 1 was publicly released? Join us in celebrating this day by:\n\n🎁 Checking out local meetups → https://t.co/TCNAZL0oOj\n🎁 Trying out the Go Playground → https://t.co/nnkaugz32x\n\nRT if you are a fellow Gopher! https://t.co/jiE7UTMHll\" / X",
Title: "Google for Developers Europe on X: \"🎉 Happy Birthday @golang!\n\nDid you know that 11 years ago today Go 1 was publicly released? Join us in celebrating this day by:\n\n🎁 Checking out local meetups → https://t.co/TCNAZL0oOj\n🎁 Trying out the Go Playground → https://t.co/nnkaugz32x\n\nRT if you are a fellow Gopher! https://t.co/jiE7UTMHll\" / X",
OpenGraph: opengraph.OpenGraph{
Title: "Google for Developers Europe on Twitter",
Title: "Google for Developers Europe on X",
Type: "article",
Image: "https://pbs.twimg.com/media/FsTSN8nWwAA278D.png:large",
URL: "https://twitter.com/GoogleDevEurope/status/1640667303158198272",
Description: "🎉 Happy Birthday @golang!\n\nDid you know that 11 years ago today Go 1 was publicly released? Join us in celebrating this day by:\n\n🎁 Checking out local meetups → https://t.co/TCNAZL0oOj\n🎁 Trying out the Go Playground → https://t.co/nnkaugz32x\n\nRT if you are a fellow Gopher!",
Description: "🎉 Happy Birthday @golang!\n\nDid you know that 11 years ago today Go 1 was publicly released? Join us in celebrating this day by:\n\n🎁 Checking out local meetups → https://t.co/TCNAZL0oOj\n🎁 Trying out the Go Playground → https://t.co/nnkaugz32x\n\nRT if you are a fellow Gopher!",
SiteName: "Twitter",
},
}, meta)
Expand All @@ -61,7 +61,7 @@ func TestPreview(t *testing.T) {
Title: "Google for Developers Europe (@GoogleDevEurope)",
Image: "https://pbs.twimg.com/media/FsTSN8nWwAA278D.png",
Description: "🎉 Happy Birthday @golang!\n\nDid you know that 11 years ago today Go 1 was publicly released? Join us in celebrating this day by:\n\n🎁 Checking out local meetups → https://goo.gle/3zaGgRi\n🎁 Trying out the Go Playground → https://goo.gle/3zaGurC\n\nRT if you are a fellow Gopher!",
SiteName: "FixTweet",
SiteName: "FixTweet - 🆕 x.com link? Try fixupx.com",
},
}, meta)
})
Expand All @@ -70,13 +70,13 @@ func TestPreview(t *testing.T) {
meta, err := NewClient().Preview(context.Background(), "https://vxtwitter.com/GoogleDevEurope/status/1640667303158198272")
require.NoError(t, err)
assert.Equal(t, Meta{
Title: "Google for Developers Europe on Twitter: \"🎉 Happy Birthday @golang!\n\nDid you know that 11 years ago today Go 1 was publicly released? Join us in celebrating this day by:\n\n🎁 Checking out local meetups → https://t.co/TCNAZL0oOj\n🎁 Trying out the Go Playground → https://t.co/nnkaugz32x\n\nRT if you are a fellow Gopher! https://t.co/jiE7UTMHll\" / X",
Title: "Google for Developers Europe on X: \"🎉 Happy Birthday @golang!\n\nDid you know that 11 years ago today Go 1 was publicly released? Join us in celebrating this day by:\n\n🎁 Checking out local meetups → https://t.co/TCNAZL0oOj\n🎁 Trying out the Go Playground → https://t.co/nnkaugz32x\n\nRT if you are a fellow Gopher! https://t.co/jiE7UTMHll\" / X",
OpenGraph: opengraph.OpenGraph{
Title: "Google for Developers Europe on Twitter",
Title: "Google for Developers Europe on X",
Type: "article",
Image: "https://pbs.twimg.com/media/FsTSN8nWwAA278D.png:large",
URL: "https://twitter.com/GoogleDevEurope/status/1640667303158198272",
Description: "🎉 Happy Birthday @golang!\n\nDid you know that 11 years ago today Go 1 was publicly released? Join us in celebrating this day by:\n\n🎁 Checking out local meetups → https://t.co/TCNAZL0oOj\n🎁 Trying out the Go Playground → https://t.co/nnkaugz32x\n\nRT if you are a fellow Gopher!",
Description: "🎉 Happy Birthday @golang!\n\nDid you know that 11 years ago today Go 1 was publicly released? Join us in celebrating this day by:\n\n🎁 Checking out local meetups → https://t.co/TCNAZL0oOj\n🎁 Trying out the Go Playground → https://t.co/nnkaugz32x\n\nRT if you are a fellow Gopher!",
SiteName: "Twitter",
},
}, meta)
Expand Down

0 comments on commit 623f9a5

Please sign in to comment.