refactor: filecollector into new package (#2174)
* refactor: filecollector into new package * Add test for symlinks * add test fix bug of GetContainerArchive * add test data
This commit is contained in:
210
pkg/filecollector/file_collector.go
Normal file
210
pkg/filecollector/file_collector.go
Normal file
@@ -0,0 +1,210 @@
|
||||
package filecollector
|
||||
|
||||
import (
|
||||
"archive/tar"
|
||||
"context"
|
||||
"fmt"
|
||||
"io"
|
||||
"io/fs"
|
||||
"os"
|
||||
"path"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
|
||||
git "github.com/go-git/go-git/v5"
|
||||
"github.com/go-git/go-git/v5/plumbing/filemode"
|
||||
"github.com/go-git/go-git/v5/plumbing/format/gitignore"
|
||||
"github.com/go-git/go-git/v5/plumbing/format/index"
|
||||
)
|
||||
|
||||
type Handler interface {
|
||||
WriteFile(path string, fi fs.FileInfo, linkName string, f io.Reader) error
|
||||
}
|
||||
|
||||
type TarCollector struct {
|
||||
TarWriter *tar.Writer
|
||||
UID int
|
||||
GID int
|
||||
DstDir string
|
||||
}
|
||||
|
||||
func (tc TarCollector) WriteFile(fpath string, fi fs.FileInfo, linkName string, f io.Reader) error {
|
||||
// create a new dir/file header
|
||||
header, err := tar.FileInfoHeader(fi, linkName)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// update the name to correctly reflect the desired destination when untaring
|
||||
header.Name = path.Join(tc.DstDir, fpath)
|
||||
header.Mode = int64(fi.Mode())
|
||||
header.ModTime = fi.ModTime()
|
||||
header.Uid = tc.UID
|
||||
header.Gid = tc.GID
|
||||
|
||||
// write the header
|
||||
if err := tc.TarWriter.WriteHeader(header); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// this is a symlink no reader provided
|
||||
if f == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
// copy file data into tar writer
|
||||
if _, err := io.Copy(tc.TarWriter, f); err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
type CopyCollector struct {
|
||||
DstDir string
|
||||
}
|
||||
|
||||
func (cc *CopyCollector) WriteFile(fpath string, fi fs.FileInfo, linkName string, f io.Reader) error {
|
||||
fdestpath := filepath.Join(cc.DstDir, fpath)
|
||||
if err := os.MkdirAll(filepath.Dir(fdestpath), 0o777); err != nil {
|
||||
return err
|
||||
}
|
||||
if f == nil {
|
||||
return os.Symlink(linkName, fdestpath)
|
||||
}
|
||||
df, err := os.OpenFile(fdestpath, os.O_CREATE|os.O_WRONLY, fi.Mode())
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer df.Close()
|
||||
if _, err := io.Copy(df, f); err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
type FileCollector struct {
|
||||
Ignorer gitignore.Matcher
|
||||
SrcPath string
|
||||
SrcPrefix string
|
||||
Fs Fs
|
||||
Handler Handler
|
||||
}
|
||||
|
||||
type Fs interface {
|
||||
Walk(root string, fn filepath.WalkFunc) error
|
||||
OpenGitIndex(path string) (*index.Index, error)
|
||||
Open(path string) (io.ReadCloser, error)
|
||||
Readlink(path string) (string, error)
|
||||
}
|
||||
|
||||
type DefaultFs struct {
|
||||
}
|
||||
|
||||
func (*DefaultFs) Walk(root string, fn filepath.WalkFunc) error {
|
||||
return filepath.Walk(root, fn)
|
||||
}
|
||||
|
||||
func (*DefaultFs) OpenGitIndex(path string) (*index.Index, error) {
|
||||
r, err := git.PlainOpen(path)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
i, err := r.Storer.Index()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return i, nil
|
||||
}
|
||||
|
||||
func (*DefaultFs) Open(path string) (io.ReadCloser, error) {
|
||||
return os.Open(path)
|
||||
}
|
||||
|
||||
func (*DefaultFs) Readlink(path string) (string, error) {
|
||||
return os.Readlink(path)
|
||||
}
|
||||
|
||||
//nolint:gocyclo
|
||||
func (fc *FileCollector) CollectFiles(ctx context.Context, submodulePath []string) filepath.WalkFunc {
|
||||
i, _ := fc.Fs.OpenGitIndex(path.Join(fc.SrcPath, path.Join(submodulePath...)))
|
||||
return func(file string, fi os.FileInfo, err error) error {
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if ctx != nil {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return fmt.Errorf("copy cancelled")
|
||||
default:
|
||||
}
|
||||
}
|
||||
|
||||
sansPrefix := strings.TrimPrefix(file, fc.SrcPrefix)
|
||||
split := strings.Split(sansPrefix, string(filepath.Separator))
|
||||
// The root folders should be skipped, submodules only have the last path component set to "." by filepath.Walk
|
||||
if fi.IsDir() && len(split) > 0 && split[len(split)-1] == "." {
|
||||
return nil
|
||||
}
|
||||
var entry *index.Entry
|
||||
if i != nil {
|
||||
entry, err = i.Entry(strings.Join(split[len(submodulePath):], "/"))
|
||||
} else {
|
||||
err = index.ErrEntryNotFound
|
||||
}
|
||||
if err != nil && fc.Ignorer != nil && fc.Ignorer.Match(split, fi.IsDir()) {
|
||||
if fi.IsDir() {
|
||||
if i != nil {
|
||||
ms, err := i.Glob(strings.Join(append(split[len(submodulePath):], "**"), "/"))
|
||||
if err != nil || len(ms) == 0 {
|
||||
return filepath.SkipDir
|
||||
}
|
||||
} else {
|
||||
return filepath.SkipDir
|
||||
}
|
||||
} else {
|
||||
return nil
|
||||
}
|
||||
}
|
||||
if err == nil && entry.Mode == filemode.Submodule {
|
||||
err = fc.Fs.Walk(file, fc.CollectFiles(ctx, split))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return filepath.SkipDir
|
||||
}
|
||||
path := filepath.ToSlash(sansPrefix)
|
||||
|
||||
// return on non-regular files (thanks to [kumo](https://medium.com/@komuw/just-like-you-did-fbdd7df829d3) for this suggested update)
|
||||
if fi.Mode()&os.ModeSymlink == os.ModeSymlink {
|
||||
linkName, err := fc.Fs.Readlink(file)
|
||||
if err != nil {
|
||||
return fmt.Errorf("unable to readlink '%s': %w", file, err)
|
||||
}
|
||||
return fc.Handler.WriteFile(path, fi, linkName, nil)
|
||||
} else if !fi.Mode().IsRegular() {
|
||||
return nil
|
||||
}
|
||||
|
||||
// open file
|
||||
f, err := fc.Fs.Open(file)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
if ctx != nil {
|
||||
// make io.Copy cancellable by closing the file
|
||||
cpctx, cpfinish := context.WithCancel(ctx)
|
||||
defer cpfinish()
|
||||
go func() {
|
||||
select {
|
||||
case <-cpctx.Done():
|
||||
case <-ctx.Done():
|
||||
f.Close()
|
||||
}
|
||||
}()
|
||||
}
|
||||
|
||||
return fc.Handler.WriteFile(path, fi, "", f)
|
||||
}
|
||||
}
|
172
pkg/filecollector/file_collector_test.go
Normal file
172
pkg/filecollector/file_collector_test.go
Normal file
@@ -0,0 +1,172 @@
|
||||
package filecollector
|
||||
|
||||
import (
|
||||
"archive/tar"
|
||||
"context"
|
||||
"io"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"github.com/go-git/go-billy/v5"
|
||||
"github.com/go-git/go-billy/v5/memfs"
|
||||
git "github.com/go-git/go-git/v5"
|
||||
"github.com/go-git/go-git/v5/plumbing/cache"
|
||||
"github.com/go-git/go-git/v5/plumbing/format/gitignore"
|
||||
"github.com/go-git/go-git/v5/plumbing/format/index"
|
||||
"github.com/go-git/go-git/v5/storage/filesystem"
|
||||
"github.com/stretchr/testify/assert"
|
||||
)
|
||||
|
||||
type memoryFs struct {
|
||||
billy.Filesystem
|
||||
}
|
||||
|
||||
func (mfs *memoryFs) walk(root string, fn filepath.WalkFunc) error {
|
||||
dir, err := mfs.ReadDir(root)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
for i := 0; i < len(dir); i++ {
|
||||
filename := filepath.Join(root, dir[i].Name())
|
||||
err = fn(filename, dir[i], nil)
|
||||
if dir[i].IsDir() {
|
||||
if err == filepath.SkipDir {
|
||||
err = nil
|
||||
} else if err := mfs.walk(filename, fn); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (mfs *memoryFs) Walk(root string, fn filepath.WalkFunc) error {
|
||||
stat, err := mfs.Lstat(root)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
err = fn(strings.Join([]string{root, "."}, string(filepath.Separator)), stat, nil)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return mfs.walk(root, fn)
|
||||
}
|
||||
|
||||
func (mfs *memoryFs) OpenGitIndex(path string) (*index.Index, error) {
|
||||
f, _ := mfs.Filesystem.Chroot(filepath.Join(path, ".git"))
|
||||
storage := filesystem.NewStorage(f, cache.NewObjectLRUDefault())
|
||||
i, err := storage.Index()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return i, nil
|
||||
}
|
||||
|
||||
func (mfs *memoryFs) Open(path string) (io.ReadCloser, error) {
|
||||
return mfs.Filesystem.Open(path)
|
||||
}
|
||||
|
||||
func (mfs *memoryFs) Readlink(path string) (string, error) {
|
||||
return mfs.Filesystem.Readlink(path)
|
||||
}
|
||||
|
||||
func TestIgnoredTrackedfile(t *testing.T) {
|
||||
fs := memfs.New()
|
||||
_ = fs.MkdirAll("mygitrepo/.git", 0o777)
|
||||
dotgit, _ := fs.Chroot("mygitrepo/.git")
|
||||
worktree, _ := fs.Chroot("mygitrepo")
|
||||
repo, _ := git.Init(filesystem.NewStorage(dotgit, cache.NewObjectLRUDefault()), worktree)
|
||||
f, _ := worktree.Create(".gitignore")
|
||||
_, _ = f.Write([]byte(".*\n"))
|
||||
f.Close()
|
||||
// This file shouldn't be in the tar
|
||||
f, _ = worktree.Create(".env")
|
||||
_, _ = f.Write([]byte("test=val1\n"))
|
||||
f.Close()
|
||||
w, _ := repo.Worktree()
|
||||
// .gitignore is in the tar after adding it to the index
|
||||
_, _ = w.Add(".gitignore")
|
||||
|
||||
tmpTar, _ := fs.Create("temp.tar")
|
||||
tw := tar.NewWriter(tmpTar)
|
||||
ps, _ := gitignore.ReadPatterns(worktree, []string{})
|
||||
ignorer := gitignore.NewMatcher(ps)
|
||||
fc := &FileCollector{
|
||||
Fs: &memoryFs{Filesystem: fs},
|
||||
Ignorer: ignorer,
|
||||
SrcPath: "mygitrepo",
|
||||
SrcPrefix: "mygitrepo" + string(filepath.Separator),
|
||||
Handler: &TarCollector{
|
||||
TarWriter: tw,
|
||||
},
|
||||
}
|
||||
err := fc.Fs.Walk("mygitrepo", fc.CollectFiles(context.Background(), []string{}))
|
||||
assert.NoError(t, err, "successfully collect files")
|
||||
tw.Close()
|
||||
_, _ = tmpTar.Seek(0, io.SeekStart)
|
||||
tr := tar.NewReader(tmpTar)
|
||||
h, err := tr.Next()
|
||||
assert.NoError(t, err, "tar must not be empty")
|
||||
assert.Equal(t, ".gitignore", h.Name)
|
||||
_, err = tr.Next()
|
||||
assert.ErrorIs(t, err, io.EOF, "tar must only contain one element")
|
||||
}
|
||||
|
||||
func TestSymlinks(t *testing.T) {
|
||||
fs := memfs.New()
|
||||
_ = fs.MkdirAll("mygitrepo/.git", 0o777)
|
||||
dotgit, _ := fs.Chroot("mygitrepo/.git")
|
||||
worktree, _ := fs.Chroot("mygitrepo")
|
||||
repo, _ := git.Init(filesystem.NewStorage(dotgit, cache.NewObjectLRUDefault()), worktree)
|
||||
// This file shouldn't be in the tar
|
||||
f, err := worktree.Create(".env")
|
||||
assert.NoError(t, err)
|
||||
_, err = f.Write([]byte("test=val1\n"))
|
||||
assert.NoError(t, err)
|
||||
f.Close()
|
||||
err = worktree.Symlink(".env", "test.env")
|
||||
assert.NoError(t, err)
|
||||
|
||||
w, err := repo.Worktree()
|
||||
assert.NoError(t, err)
|
||||
|
||||
// .gitignore is in the tar after adding it to the index
|
||||
_, err = w.Add(".env")
|
||||
assert.NoError(t, err)
|
||||
_, err = w.Add("test.env")
|
||||
assert.NoError(t, err)
|
||||
|
||||
tmpTar, _ := fs.Create("temp.tar")
|
||||
tw := tar.NewWriter(tmpTar)
|
||||
ps, _ := gitignore.ReadPatterns(worktree, []string{})
|
||||
ignorer := gitignore.NewMatcher(ps)
|
||||
fc := &FileCollector{
|
||||
Fs: &memoryFs{Filesystem: fs},
|
||||
Ignorer: ignorer,
|
||||
SrcPath: "mygitrepo",
|
||||
SrcPrefix: "mygitrepo" + string(filepath.Separator),
|
||||
Handler: &TarCollector{
|
||||
TarWriter: tw,
|
||||
},
|
||||
}
|
||||
err = fc.Fs.Walk("mygitrepo", fc.CollectFiles(context.Background(), []string{}))
|
||||
assert.NoError(t, err, "successfully collect files")
|
||||
tw.Close()
|
||||
_, _ = tmpTar.Seek(0, io.SeekStart)
|
||||
tr := tar.NewReader(tmpTar)
|
||||
h, err := tr.Next()
|
||||
files := map[string]tar.Header{}
|
||||
for err == nil {
|
||||
files[h.Name] = *h
|
||||
h, err = tr.Next()
|
||||
}
|
||||
|
||||
assert.Equal(t, ".env", files[".env"].Name)
|
||||
assert.Equal(t, "test.env", files["test.env"].Name)
|
||||
assert.Equal(t, ".env", files["test.env"].Linkname)
|
||||
assert.ErrorIs(t, err, io.EOF, "tar must be read cleanly to EOF")
|
||||
}
|
Reference in New Issue
Block a user