10000 plumbing: format/packfile, prevent large objects from being read into memory completely by zeripath · Pull Request #303 · go-git/go-git · GitHub
[go: up one dir, main page]

Skip to content

plumbing: format/packfile, prevent large objects from being read into memory completely #303

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
May 12, 2021
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

< 10000 /div>
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
Prevent large objects from being read into memory completely
This PR adds code to prevent large objects from being read into memory from packfiles or the filesystem.

Objects greater than 1Mb are now no longer directly stored in the cache
or read completely into memory.

Signed-off-by: Andrew Thornton <art27@cantab.net>
  • Loading branch information
zeripath committed Apr 18, 2021
commit 77d44a359b801fb9880641fc4b9b1ff76581dc06
77 changes: 77 additions & 0 deletions plumbing/format/objfile/reader.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,16 @@ package objfile
import (
"compress/zlib"
"errors"
"fmt"
"io"
"os"
"strconv"

"github.com/go-git/go-git/v5/plumbing"
"github.com/go-git/go-git/v5/plumbing/format/packfile"
"github.com/go-git/go-git/v5/utils/ioutil"

"github.com/go-git/go-billy/v5"
)

var (
Expand Down Expand Up @@ -112,3 +117,75 @@ func (r *Reader) Hash() plumbing.Hash {
func (r *Reader) Close() error {
return r.zlib.Close()
}

var _ (plumbing.EncodedObject) = &EncodedObject{}

type BillyFileObjectGetter interface {
Object(plumbing.Hash) (billy.File, error)
}

type EncodedObject struct {
dir BillyFileObjectGetter
h plumbing.Hash
t plumbing.ObjectType
sz int64
}

func (e *EncodedObject) Hash() plumbing.Hash {
return e.h
}

func (e *EncodedObject) Reader() (io.ReadCloser, error) {
f, err := e.dir.Object(e.h)
if err != nil {
if os.IsNotExist(err) {
return nil, plumbing.ErrObjectNotFound
}

return nil, err
}
r, err := NewReader(f)
if err != nil {
return nil, err
}

t, size, err := r.Header()
if err != nil {
_ = r.Close()
return nil, err
}
if t != e.t {
_ = r.Close()
return nil, ErrHeader
}
if size != e.sz {
_ = r.Close()
return nil, ErrHeader
}
return ioutil.NewReadCloserWithCloser(r, f.Close), nil
}

func (e *EncodedObject) SetType(plumbing.ObjectType) {}

func (e *EncodedObject) Type() plumbing.ObjectType {
return e.t
}

func (e *EncodedObject) Size() int64 {
return e.sz
}

func (e *EncodedObject) SetSize(int64) {}

func (e *EncodedObject) Writer() (io.WriteCloser, error) {
return nil, fmt.Errorf("Not supported")
}

func NewEncodedObject(dir BillyFileObjectGetter, h plumbing.Hash, t plumbing.ObjectType, size int64) *EncodedObject {
return &EncodedObject{
dir: dir,
h: h,
t: t,
sz: size,
}
}
15 changes: 15 additions & 0 deletions plumbing/format/packfile/fsobject.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import (
"github.com/go-git/go-git/v5/plumbing"
"github.com/go-git/go-git/v5/plumbing/cache"
"github.com/go-git/go-git/v5/plumbing/format/idxfile"
"github.com/go-git/go-git/v5/utils/ioutil"
)

// FSObject is an object from the packfile on the filesystem.
Expand Down Expand Up @@ -63,6 +64,20 @@ func (o *FSObject) Reader() (io.ReadCloser, error) {
}

p := NewPackfileWithCache(o.index, nil, f, o.cache)
if o.size > LargeObjectThreshold {
// We have a big object
h, err := p.objectHeaderAtOffset(o.offset)
if err != nil {
return nil, err
}

r, err := p.getReaderDirect(h)
if err != nil {
_ = f.Close()
return nil, err
}
return ioutil.NewReadCloserWithCloser(r, f.Close), nil
}
r, err := p.getObjectContent(o.offset)
if err != nil {
_ = f.Close()
Expand Down
73 changes: 73 additions & 0 deletions plumbing/format/packfile/packfile.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,12 @@ var (
// wrapped in FSObject.
const smallObjectThreshold = 16 * 1024

// Conversely there are large objects that should not be cached and kept
// in memory as they're too large to be reasonably cached. Objects larger
// than this threshold are now always never read into memory to be stored
// in the cache
const LargeObjectThreshold = 1024 * 1024

// Packfile allows retrieving information from inside a packfile.
type Packfile struct {
idxfile.Index
Expand Down Expand Up @@ -282,6 +288,37 @@ func (p *Packfile) getObjectContent(offset int64) (io.ReadCloser, error) {
return obj.Reader()
}

func (p *Packfile) getReaderDirect(h *ObjectHeader) (io.ReadCloser, error) {
switch h.Type {
case plumbing.CommitObject, plumbing.TreeObject, plumbing.BlobObject, plumbing.TagObject:
return p.s.ReadObject()
case plumbing.REFDeltaObject:
deltaRC, err := p.s.ReadObject()
if err != nil {
return nil, err
}
r, err := p.readREFDeltaObjectContent(h, deltaRC)
if err != nil {
_ = deltaRC.Close()
return nil, err
}
return r, nil
case plumbing.OFSDeltaObject:
deltaRC, err := p.s.ReadObject()
if err != nil {
return nil, err
}
r, err := p.readOFSDeltaObjectContent(h, deltaRC)
if err != nil {
_ = deltaRC.Close()
return nil, err
}
return r, nil
default:
return nil, ErrInvalidObject.AddDetails("type %q", h.Type)
}
}

func (p *Packfile) getNextMemoryObject(h *ObjectHeader) (plumbing.EncodedObject, error) {
var obj = new(plumbing.MemoryObject)
obj.SetSize(h.Length)
Expand Down Expand Up @@ -334,6 +371,20 @@ func (p *Packfile) fillREFDeltaObjectContent(obj plumbing.EncodedObject, ref plu
return p.fillREFDeltaObjectContentWithBuffer(obj, ref, buf)
}

func (p *Packfile) readREFDeltaObjectContent(h *ObjectHeader, deltaRC io.ReadCloser) (io.ReadCloser, error) {
var err error

base, ok := p.cacheGet(h.Reference)
if !ok {
base, err = p.Get(h.Reference)
if err != nil {
return nil, err
}
}

return ReaderFromDelta(h, base, deltaRC)
}

func (p *Packfile) fillREFDeltaObjectContentWithBuffer(obj plumbing.EncodedObject, ref plumbing.Hash, buf *bytes.Buffer) error {
var err error

Expand Down Expand Up @@ -364,6 +415,28 @@ func (p *Packfile) fillOFSDeltaObjectContent(obj plumbing.EncodedObject, offset
return p.fillOFSDeltaObjectContentWithBuffer(obj, offset, buf)
}

func (p *Packfile) readOFSDeltaObjectContent(h *ObjectHeader, deltaRC io.ReadCloser) (io.ReadCloser, error) {
hash, err := p.FindHash(h.OffsetReference)
if err != nil {
return nil, err
}

base, err := p.objectAtOffset(h.OffsetReference, hash)
if err != nil {
return nil, err
}

base, ok := p.cacheGet(h.Reference)
if !ok {
base, err = p.Get(h.Reference)
if err != nil {
return nil, err
}
}

return ReaderFromDelta(h, base, deltaRC)
}

func (p *Packfile) fillOFSDeltaObjectContentWithBuffer(obj plumbing.EncodedObject, offset int64, buf *bytes.Buffer) error {
hash, err := p.FindHash(offset)
if err != nil {
Expand Down
Loading
0