Skip to content

Commit

Permalink
Release v0.1.1 (#176)
Browse files Browse the repository at this point in the history
  • Loading branch information
henomis committed Mar 8, 2024
1 parent e3f665f commit 32ee8b0
Show file tree
Hide file tree
Showing 6 changed files with 169 additions and 23 deletions.
28 changes: 28 additions & 0 deletions examples/llm/ollama/multimodal/main.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
package main

import (
"context"
"fmt"

"github.com/henomis/lingoose/llm/ollama"
"github.com/henomis/lingoose/thread"
)

func main() {
ollamallm := ollama.New().WithModel("llava")

t := thread.New().AddMessage(
thread.NewUserMessage().AddContent(
thread.NewTextContent("Can you describe the image?"),
).AddContent(
thread.NewImageContentFromURL("https://upload.wikimedia.org/wikipedia/commons/thumb/3/34/Anser_anser_1_%28Piotr_Kuczynski%29.jpg/1280px-Anser_anser_1_%28Piotr_Kuczynski%29.jpg"),
),
)

err := ollamallm.Generate(context.Background(), t)
if err != nil {
panic(err)
}

fmt.Println(t)
}
40 changes: 35 additions & 5 deletions llm/ollama/api.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,12 @@ package ollama

import (
"bytes"
"encoding/base64"
"encoding/json"
"io"
"net/http"
"os"
"strings"

"github.com/henomis/restclientgo"
)
Expand All @@ -29,7 +33,7 @@ func (r *request) Encode() (io.Reader, error) {
}

func (r *request) ContentType() string {
return "application/json"
return jsonContentType
}

type response[T any] struct {
Expand All @@ -40,6 +44,7 @@ type response[T any] struct {
Message T `json:"message"`
Done bool `json:"done"`
streamCallbackFn restclientgo.StreamCallback
RawBody []byte `json:"-"`
}

type assistantMessage struct {
Expand All @@ -55,15 +60,16 @@ func (r *response[T]) Decode(body io.Reader) error {
return json.NewDecoder(body).Decode(r)
}

func (r *response[T]) SetBody(_ io.Reader) error {
func (r *response[T]) SetBody(body io.Reader) error {
r.RawBody, _ = io.ReadAll(body)
return nil
}

func (r *response[T]) AcceptContentType() string {
if r.acceptContentType != "" {
return r.acceptContentType
}
return "application/json"
return jsonContentType
}

func (r *response[T]) SetStatusCode(code int) error {
Expand All @@ -82,10 +88,34 @@ func (r *response[T]) StreamCallback() restclientgo.StreamCallback {
}

type message struct {
Role string `json:"role"`
Content string `json:"content"`
Role string `json:"role"`
Content string `json:"content,omitempty"`
Images []string `json:"images,omitempty"`
}

type options struct {
Temperature float64 `json:"temperature"`
}

func getImageDataAsBase64(imageURL string) (string, error) {
var imageData []byte
var err error

if strings.HasPrefix(imageURL, "http://") || strings.HasPrefix(imageURL, "https://") {
//nolint:gosec
resp, fetchErr := http.Get(imageURL)
if fetchErr != nil {
return "", fetchErr
}
defer resp.Body.Close()

imageData, err = io.ReadAll(resp.Body)
} else {
imageData, err = os.ReadFile(imageURL)
}
if err != nil {
return "", err
}

return base64.StdEncoding.EncodeToString(imageData), nil
}
34 changes: 26 additions & 8 deletions llm/ollama/formatter.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
package ollama

import "github.com/henomis/lingoose/thread"
import (
"github.com/henomis/lingoose/thread"
)

func (o *Ollama) buildChatCompletionRequest(t *thread.Thread) *request {
return &request{
Expand All @@ -12,19 +14,35 @@ func (o *Ollama) buildChatCompletionRequest(t *thread.Thread) *request {
}
}

//nolint:gocognit
func threadToChatMessages(t *thread.Thread) []message {
chatMessages := make([]message, len(t.Messages))
for i, m := range t.Messages {
chatMessages[i] = message{
Role: threadRoleToOllamaRole[m.Role],
}

var chatMessages []message
for _, m := range t.Messages {
switch m.Role {
case thread.RoleUser, thread.RoleSystem, thread.RoleAssistant:
for _, content := range m.Contents {
chatMessage := message{
Role: threadRoleToOllamaRole[m.Role],
}

contentData, ok := content.Data.(string)
if !ok {
continue
}

if content.Type == thread.ContentTypeText {
chatMessages[i].Content += content.Data.(string) + "\n"
chatMessage.Content = contentData
} else if content.Type == thread.ContentTypeImage {
imageData, err := getImageDataAsBase64(contentData)
if err != nil {
continue
}
chatMessage.Images = []string{imageData}
} else {
continue
}

chatMessages = append(chatMessages, chatMessage)
}
case thread.RoleTool:
continue
Expand Down
8 changes: 7 additions & 1 deletion llm/ollama/ollama.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import (
"encoding/json"
"errors"
"fmt"
"net/http"
"strings"

"github.com/henomis/lingoose/llm/cache"
Expand All @@ -15,6 +16,7 @@ import (
const (
defaultModel = "llama2"
ndjsonContentType = "application/x-ndjson"
jsonContentType = "application/json"
defaultEndpoint = "http://localhost:11434/api"
)

Expand All @@ -32,10 +34,10 @@ type StreamCallbackFn func(string)

type Ollama struct {
model string
temperature float64
restClient *restclientgo.RestClient
streamCallbackFn StreamCallbackFn
cache *cache.Cache
temperature float64
}

func New() *Ollama {
Expand Down Expand Up @@ -199,6 +201,10 @@ func (o *Ollama) stream(ctx context.Context, t *thread.Thread, chatRequest *requ
return fmt.Errorf("%w: %w", ErrOllamaChat, err)
}

if resp.HTTPStatusCode >= http.StatusBadRequest {
return fmt.Errorf("%w: %s", ErrOllamaChat, resp.RawBody)
}

t.AddMessage(thread.NewAssistantMessage().AddContent(
thread.NewTextContent(assistantMessage),
))
Expand Down
20 changes: 11 additions & 9 deletions textsplitter/recursiveTextSplitter.go
Original file line number Diff line number Diff line change
Expand Up @@ -68,23 +68,21 @@ func (r *RecursiveCharacterTextSplitter) SplitText(text string) []string {
finalChunks := []string{}
// Get appropriate separator to use
separator := r.separators[len(r.separators)-1]
for _, s := range r.separators {
newSeparators := []string{}
for i, s := range r.separators {
if s == "" {
separator = s
break
}

if strings.Contains(text, s) {
separator = s
newSeparators = r.separators[i+1:]
break
}
}
// Now that we have the separator, split the text
var splits []string
if separator != "" {
splits = strings.Split(text, separator)
} else {
splits = strings.Split(text, "")
}
splits := strings.Split(text, separator)
// Now go merging things, recursively splitting longer texts.
goodSplits := []string{}
for _, s := range splits {
Expand All @@ -96,8 +94,12 @@ func (r *RecursiveCharacterTextSplitter) SplitText(text string) []string {
finalChunks = append(finalChunks, mergedText...)
goodSplits = []string{}
}
otherInfo := r.SplitText(s)
finalChunks = append(finalChunks, otherInfo...)
if len(newSeparators) == 0 {
finalChunks = append(finalChunks, s)
} else {
otherInfo := r.SplitText(s)
finalChunks = append(finalChunks, otherInfo...)
}
}
}
if len(goodSplits) > 0 {
Expand Down
62 changes: 62 additions & 0 deletions textsplitter/recursiveTextSplitter_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,13 @@ package textsplitter
import (
"reflect"
"testing"
"unicode/utf8"

"github.com/henomis/lingoose/document"
"github.com/henomis/lingoose/types"
)

//nolint:dupword,funlen
func TestRecursiveCharacterTextSplitter_SplitDocuments(t *testing.T) {
type fields struct {
textSplitter TextSplitter
Expand Down Expand Up @@ -53,6 +55,66 @@ func TestRecursiveCharacterTextSplitter_SplitDocuments(t *testing.T) {
},
},
},
{
name: "TestRecursiveCharacterTextSplitter_SplitDocuments",
fields: fields{
textSplitter: TextSplitter{
chunkSize: 20,
chunkOverlap: 1,
lengthFunction: func(s string) int {
return len(s)
},
},
separators: []string{"\n", "$"},
},
args: args{
documents: []document.Document{
{
Content: "Hi, Harrison. \nI am glad to meet you",
Metadata: types.Meta{},
},
},
},
want: []document.Document{
{
Content: "Hi, Harrison.",
Metadata: types.Meta{},
},
{
Content: "I am glad to meet you",
Metadata: types.Meta{},
},
},
},
{
name: "TestRecursiveCharacterTextSplitter_SplitDocuments",
fields: fields{
textSplitter: TextSplitter{
chunkSize: 10,
chunkOverlap: 0,
lengthFunction: utf8.RuneCountInString,
},
separators: []string{"\n\n", "\n", " "},
},
args: args{
documents: []document.Document{
{
Content: "哈里森\n很高兴遇见你\n欢迎来中国",
Metadata: types.Meta{},
},
},
},
want: []document.Document{
{
Content: "哈里森\n很高兴遇见你",
Metadata: types.Meta{},
},
{
Content: "欢迎来中国",
Metadata: types.Meta{},
},
},
},
{
name: "TestRecursiveCharacterTextSplitter_SplitDocuments",
fields: fields{
Expand Down

0 comments on commit 32ee8b0

Please sign in to comment.