refactor: filecollector into new package (#2174)

* refactor: filecollector into new package

* Add test for symlinks

* add test fix bug of GetContainerArchive

* add test data
This commit is contained in:
ChristopherHX
2024-01-30 01:46:45 +01:00
committed by GitHub
parent 4ca35d2192
commit 5a80a044f9
6 changed files with 160 additions and 35 deletions

View File

@@ -0,0 +1,210 @@
package filecollector
import (
"archive/tar"
"context"
"fmt"
"io"
"io/fs"
"os"
"path"
"path/filepath"
"strings"
git "github.com/go-git/go-git/v5"
"github.com/go-git/go-git/v5/plumbing/filemode"
"github.com/go-git/go-git/v5/plumbing/format/gitignore"
"github.com/go-git/go-git/v5/plumbing/format/index"
)
type Handler interface {
WriteFile(path string, fi fs.FileInfo, linkName string, f io.Reader) error
}
type TarCollector struct {
TarWriter *tar.Writer
UID int
GID int
DstDir string
}
func (tc TarCollector) WriteFile(fpath string, fi fs.FileInfo, linkName string, f io.Reader) error {
// create a new dir/file header
header, err := tar.FileInfoHeader(fi, linkName)
if err != nil {
return err
}
// update the name to correctly reflect the desired destination when untaring
header.Name = path.Join(tc.DstDir, fpath)
header.Mode = int64(fi.Mode())
header.ModTime = fi.ModTime()
header.Uid = tc.UID
header.Gid = tc.GID
// write the header
if err := tc.TarWriter.WriteHeader(header); err != nil {
return err
}
// this is a symlink no reader provided
if f == nil {
return nil
}
// copy file data into tar writer
if _, err := io.Copy(tc.TarWriter, f); err != nil {
return err
}
return nil
}
type CopyCollector struct {
DstDir string
}
func (cc *CopyCollector) WriteFile(fpath string, fi fs.FileInfo, linkName string, f io.Reader) error {
fdestpath := filepath.Join(cc.DstDir, fpath)
if err := os.MkdirAll(filepath.Dir(fdestpath), 0o777); err != nil {
return err
}
if f == nil {
return os.Symlink(linkName, fdestpath)
}
df, err := os.OpenFile(fdestpath, os.O_CREATE|os.O_WRONLY, fi.Mode())
if err != nil {
return err
}
defer df.Close()
if _, err := io.Copy(df, f); err != nil {
return err
}
return nil
}
type FileCollector struct {
Ignorer gitignore.Matcher
SrcPath string
SrcPrefix string
Fs Fs
Handler Handler
}
type Fs interface {
Walk(root string, fn filepath.WalkFunc) error
OpenGitIndex(path string) (*index.Index, error)
Open(path string) (io.ReadCloser, error)
Readlink(path string) (string, error)
}
type DefaultFs struct {
}
func (*DefaultFs) Walk(root string, fn filepath.WalkFunc) error {
return filepath.Walk(root, fn)
}
func (*DefaultFs) OpenGitIndex(path string) (*index.Index, error) {
r, err := git.PlainOpen(path)
if err != nil {
return nil, err
}
i, err := r.Storer.Index()
if err != nil {
return nil, err
}
return i, nil
}
func (*DefaultFs) Open(path string) (io.ReadCloser, error) {
return os.Open(path)
}
func (*DefaultFs) Readlink(path string) (string, error) {
return os.Readlink(path)
}
//nolint:gocyclo
func (fc *FileCollector) CollectFiles(ctx context.Context, submodulePath []string) filepath.WalkFunc {
i, _ := fc.Fs.OpenGitIndex(path.Join(fc.SrcPath, path.Join(submodulePath...)))
return func(file string, fi os.FileInfo, err error) error {
if err != nil {
return err
}
if ctx != nil {
select {
case <-ctx.Done():
return fmt.Errorf("copy cancelled")
default:
}
}
sansPrefix := strings.TrimPrefix(file, fc.SrcPrefix)
split := strings.Split(sansPrefix, string(filepath.Separator))
// The root folders should be skipped, submodules only have the last path component set to "." by filepath.Walk
if fi.IsDir() && len(split) > 0 && split[len(split)-1] == "." {
return nil
}
var entry *index.Entry
if i != nil {
entry, err = i.Entry(strings.Join(split[len(submodulePath):], "/"))
} else {
err = index.ErrEntryNotFound
}
if err != nil && fc.Ignorer != nil && fc.Ignorer.Match(split, fi.IsDir()) {
if fi.IsDir() {
if i != nil {
ms, err := i.Glob(strings.Join(append(split[len(submodulePath):], "**"), "/"))
if err != nil || len(ms) == 0 {
return filepath.SkipDir
}
} else {
return filepath.SkipDir
}
} else {
return nil
}
}
if err == nil && entry.Mode == filemode.Submodule {
err = fc.Fs.Walk(file, fc.CollectFiles(ctx, split))
if err != nil {
return err
}
return filepath.SkipDir
}
path := filepath.ToSlash(sansPrefix)
// return on non-regular files (thanks to [kumo](https://medium.com/@komuw/just-like-you-did-fbdd7df829d3) for this suggested update)
if fi.Mode()&os.ModeSymlink == os.ModeSymlink {
linkName, err := fc.Fs.Readlink(file)
if err != nil {
return fmt.Errorf("unable to readlink '%s': %w", file, err)
}
return fc.Handler.WriteFile(path, fi, linkName, nil)
} else if !fi.Mode().IsRegular() {
return nil
}
// open file
f, err := fc.Fs.Open(file)
if err != nil {
return err
}
defer f.Close()
if ctx != nil {
// make io.Copy cancellable by closing the file
cpctx, cpfinish := context.WithCancel(ctx)
defer cpfinish()
go func() {
select {
case <-cpctx.Done():
case <-ctx.Done():
f.Close()
}
}()
}
return fc.Handler.WriteFile(path, fi, "", f)
}
}

View File

@@ -0,0 +1,172 @@
package filecollector
import (
"archive/tar"
"context"
"io"
"path/filepath"
"strings"
"testing"
"github.com/go-git/go-billy/v5"
"github.com/go-git/go-billy/v5/memfs"
git "github.com/go-git/go-git/v5"
"github.com/go-git/go-git/v5/plumbing/cache"
"github.com/go-git/go-git/v5/plumbing/format/gitignore"
"github.com/go-git/go-git/v5/plumbing/format/index"
"github.com/go-git/go-git/v5/storage/filesystem"
"github.com/stretchr/testify/assert"
)
type memoryFs struct {
billy.Filesystem
}
func (mfs *memoryFs) walk(root string, fn filepath.WalkFunc) error {
dir, err := mfs.ReadDir(root)
if err != nil {
return err
}
for i := 0; i < len(dir); i++ {
filename := filepath.Join(root, dir[i].Name())
err = fn(filename, dir[i], nil)
if dir[i].IsDir() {
if err == filepath.SkipDir {
err = nil
} else if err := mfs.walk(filename, fn); err != nil {
return err
}
}
if err != nil {
return err
}
}
return nil
}
func (mfs *memoryFs) Walk(root string, fn filepath.WalkFunc) error {
stat, err := mfs.Lstat(root)
if err != nil {
return err
}
err = fn(strings.Join([]string{root, "."}, string(filepath.Separator)), stat, nil)
if err != nil {
return err
}
return mfs.walk(root, fn)
}
func (mfs *memoryFs) OpenGitIndex(path string) (*index.Index, error) {
f, _ := mfs.Filesystem.Chroot(filepath.Join(path, ".git"))
storage := filesystem.NewStorage(f, cache.NewObjectLRUDefault())
i, err := storage.Index()
if err != nil {
return nil, err
}
return i, nil
}
func (mfs *memoryFs) Open(path string) (io.ReadCloser, error) {
return mfs.Filesystem.Open(path)
}
func (mfs *memoryFs) Readlink(path string) (string, error) {
return mfs.Filesystem.Readlink(path)
}
func TestIgnoredTrackedfile(t *testing.T) {
fs := memfs.New()
_ = fs.MkdirAll("mygitrepo/.git", 0o777)
dotgit, _ := fs.Chroot("mygitrepo/.git")
worktree, _ := fs.Chroot("mygitrepo")
repo, _ := git.Init(filesystem.NewStorage(dotgit, cache.NewObjectLRUDefault()), worktree)
f, _ := worktree.Create(".gitignore")
_, _ = f.Write([]byte(".*\n"))
f.Close()
// This file shouldn't be in the tar
f, _ = worktree.Create(".env")
_, _ = f.Write([]byte("test=val1\n"))
f.Close()
w, _ := repo.Worktree()
// .gitignore is in the tar after adding it to the index
_, _ = w.Add(".gitignore")
tmpTar, _ := fs.Create("temp.tar")
tw := tar.NewWriter(tmpTar)
ps, _ := gitignore.ReadPatterns(worktree, []string{})
ignorer := gitignore.NewMatcher(ps)
fc := &FileCollector{
Fs: &memoryFs{Filesystem: fs},
Ignorer: ignorer,
SrcPath: "mygitrepo",
SrcPrefix: "mygitrepo" + string(filepath.Separator),
Handler: &TarCollector{
TarWriter: tw,
},
}
err := fc.Fs.Walk("mygitrepo", fc.CollectFiles(context.Background(), []string{}))
assert.NoError(t, err, "successfully collect files")
tw.Close()
_, _ = tmpTar.Seek(0, io.SeekStart)
tr := tar.NewReader(tmpTar)
h, err := tr.Next()
assert.NoError(t, err, "tar must not be empty")
assert.Equal(t, ".gitignore", h.Name)
_, err = tr.Next()
assert.ErrorIs(t, err, io.EOF, "tar must only contain one element")
}
func TestSymlinks(t *testing.T) {
fs := memfs.New()
_ = fs.MkdirAll("mygitrepo/.git", 0o777)
dotgit, _ := fs.Chroot("mygitrepo/.git")
worktree, _ := fs.Chroot("mygitrepo")
repo, _ := git.Init(filesystem.NewStorage(dotgit, cache.NewObjectLRUDefault()), worktree)
// This file shouldn't be in the tar
f, err := worktree.Create(".env")
assert.NoError(t, err)
_, err = f.Write([]byte("test=val1\n"))
assert.NoError(t, err)
f.Close()
err = worktree.Symlink(".env", "test.env")
assert.NoError(t, err)
w, err := repo.Worktree()
assert.NoError(t, err)
// .gitignore is in the tar after adding it to the index
_, err = w.Add(".env")
assert.NoError(t, err)
_, err = w.Add("test.env")
assert.NoError(t, err)
tmpTar, _ := fs.Create("temp.tar")
tw := tar.NewWriter(tmpTar)
ps, _ := gitignore.ReadPatterns(worktree, []string{})
ignorer := gitignore.NewMatcher(ps)
fc := &FileCollector{
Fs: &memoryFs{Filesystem: fs},
Ignorer: ignorer,
SrcPath: "mygitrepo",
SrcPrefix: "mygitrepo" + string(filepath.Separator),
Handler: &TarCollector{
TarWriter: tw,
},
}
err = fc.Fs.Walk("mygitrepo", fc.CollectFiles(context.Background(), []string{}))
assert.NoError(t, err, "successfully collect files")
tw.Close()
_, _ = tmpTar.Seek(0, io.SeekStart)
tr := tar.NewReader(tmpTar)
h, err := tr.Next()
files := map[string]tar.Header{}
for err == nil {
files[h.Name] = *h
h, err = tr.Next()
}
assert.Equal(t, ".env", files[".env"].Name)
assert.Equal(t, "test.env", files["test.env"].Name)
assert.Equal(t, ".env", files["test.env"].Linkname)
assert.ErrorIs(t, err, io.EOF, "tar must be read cleanly to EOF")
}