Skip to content

Commit

Permalink
Avoid buffering in archive detection if the stream is seekable
Browse files Browse the repository at this point in the history
  • Loading branch information
cmaglie committed May 25, 2023
1 parent 4518746 commit 5c29552
Show file tree
Hide file tree
Showing 2 changed files with 12 additions and 6 deletions.
8 changes: 3 additions & 5 deletions extract_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -369,7 +369,7 @@ func TestTarGzMemoryConsumption(t *testing.T) {
runtime.GC()
runtime.ReadMemStats(&m)

err = extract.Gz(context.Background(), f, tmpDir.String(), nil)
err = extract.Archive(context.Background(), f, tmpDir.String(), nil)
require.NoError(t, err)

runtime.ReadMemStats(&m2)
Expand Down Expand Up @@ -397,7 +397,7 @@ func TestZipMemoryConsumption(t *testing.T) {
runtime.GC()
runtime.ReadMemStats(&m)

err = extract.Zip(context.Background(), f, tmpDir.String(), nil)
err = extract.Archive(context.Background(), f, tmpDir.String(), nil)
require.NoError(t, err)

runtime.ReadMemStats(&m2)
Expand All @@ -406,9 +406,7 @@ func TestZipMemoryConsumption(t *testing.T) {
heapUsed = 0
}
fmt.Println("Heap memory used during the test:", heapUsed)
// the .zip file require random access, so the full io.Reader content must be cached, since
// the test file is 130MB, that's the reason for the high memory consumed.
require.True(t, heapUsed < 250000000, "heap consumption should be less than 250M but is %d", heapUsed)
require.True(t, heapUsed < 5000000, "heap consumption should be less than 5M but is %d", heapUsed)
}

func download(t require.TestingT, url string, file *paths.Path) error {
Expand Down
10 changes: 9 additions & 1 deletion extractor.go
Original file line number Diff line number Diff line change
Expand Up @@ -363,7 +363,15 @@ func match(r io.Reader) (io.Reader, types.Type, error) {
return nil, types.Unknown, err
}

r = io.MultiReader(bytes.NewBuffer(buffer[:n]), r)
if seeker, ok := r.(io.Seeker); ok {
// if the stream is seekable, we just rewind it
if _, err := seeker.Seek(0, io.SeekStart); err != nil {
return nil, types.Unknown, err
}
} else {
// otherwise we create a new reader that will prepend the buffer
r = io.MultiReader(bytes.NewBuffer(buffer[:n]), r)
}

typ, err := filetype.Match(buffer)

Expand Down

0 comments on commit 5c29552

Please sign in to comment.