Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions pkg/local_workflows/code_workflow/native_workflow.go
Original file line number Diff line number Diff line change
Expand Up @@ -305,13 +305,13 @@ func determineAnalyzeInput(path string, config configuration.Configuration, logg

// Return a channel that notifies each file in the path that doesn't match the filter rules
func getFilesForPath(path string, logger *zerolog.Logger, max_threads int) (<-chan string, error) {
filter := utils.NewFileFilter(path, logger, utils.WithThreadNumber(max_threads))
rules, err := filter.GetRules([]string{".gitignore", ".dcignore", ".snyk"})
f, err := utils.NewIgnoresFileFilterFromIgnoreFiles(path, []string{".gitignore", ".dcignore", ".snyk"}, logger)
if err != nil {
return nil, err
}

results := filter.GetFilteredFiles(filter.GetAllFiles(), rules)
filter := utils.NewFileFilter(path, logger, utils.WithFileFilterStrategies([]utils.Filterable{f}), utils.WithThreadNumber(max_threads))
results := filter.GetFilteredFiles()
return results, nil
}

Expand Down
179 changes: 118 additions & 61 deletions pkg/utils/file_filter.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,23 +3,26 @@ package utils
import (
"context"
"fmt"
"github.com/rs/zerolog"
gitignore "github.com/sabhiram/go-gitignore"
"golang.org/x/sync/semaphore"
"gopkg.in/yaml.v3"
"io/fs"
"os"
"path/filepath"
"runtime"
"strings"

"github.com/rs/zerolog"
gitignore "github.com/sabhiram/go-gitignore"
"golang.org/x/sync/semaphore"
"gopkg.in/yaml.v3"
)

type Filterable interface {
Filter(path string) bool
}

type FileFilter struct {
path string
defaultRules []string
logger *zerolog.Logger
max_threads int64
path string
logger *zerolog.Logger
FilterStrategies []Filterable
max_threads int64
}

type FileFilterOption func(*FileFilter) error
Expand All @@ -35,12 +38,31 @@ func WithThreadNumber(maxThreadCount int) FileFilterOption {
}
}

func WithFileFilterStrategies(strategies []Filterable) FileFilterOption {
return func(filter *FileFilter) error {
filter.FilterStrategies = append(filter.FilterStrategies, strategies...)
return nil
}
}

func WithDefaultRulesFilter() FileFilterOption {
return func(filter *FileFilter) error {
defaultFilter, err := NewIgnoresFileFilterFromGlobs([]string{"**/.git/**"})
if err != nil {
return fmt.Errorf("error creating default filter: %w", err)

}

filter.FilterStrategies = append(filter.FilterStrategies, defaultFilter)
return nil
}
}

func NewFileFilter(path string, logger *zerolog.Logger, options ...FileFilterOption) *FileFilter {
filter := &FileFilter{
path: path,
defaultRules: []string{"**/.git/**"},
logger: logger,
max_threads: int64(runtime.NumCPU()),
path: path,
logger: logger,
max_threads: int64(runtime.NumCPU()),
}

for _, option := range options {
Expand All @@ -53,35 +75,82 @@ func NewFileFilter(path string, logger *zerolog.Logger, options ...FileFilterOpt
return filter
}

// GetAllFiles traverses a given dir path and fetches all filesToFilter in the directory
func (fw *FileFilter) GetAllFiles() chan string {
var filesCh = make(chan string)
// GetFilteredFiles returns a filtered channel of filepaths from a given channel of filespaths and glob patterns to filter on
func (fw *FileFilter) GetFilteredFiles() chan string {
filesCh := getAllFiles(fw.path, fw.logger)

var filteredFilesCh = make(chan string)
go func() {
defer close(filesCh)
ctx := context.Background()
availableThreads := semaphore.NewWeighted(fw.max_threads)

err := filepath.WalkDir(fw.path, func(path string, d fs.DirEntry, err error) error {
defer close(filteredFilesCh)

// iterate the filesToFilter channel
for file := range filesCh {
err := availableThreads.Acquire(ctx, 1)
if err != nil {
return err
fw.logger.Err(err).Msg("failed to limit threads")
}
go func(f string) {
defer availableThreads.Release(1)
// filesToFilter that do not match the filter list are excluded
keepFile := true
for _, filter := range fw.FilterStrategies {
if filter.Filter(f) {
keepFile = false
break
}
}

if !d.IsDir() {
filesCh <- path
}
if keepFile {
filteredFilesCh <- f
}
}(file)
}

return err
})
// wait until the last thread is done
err := availableThreads.Acquire(ctx, fw.max_threads)
if err != nil {
fw.logger.Error().Msgf("walk dir failed: %v", err)
fw.logger.Err(err).Msg("failed to wait for all threads")
}
}()

return filesCh
return filteredFilesCh
}

// GetRules builds a list of glob patterns that can be used to filter filesToFilter
func (fw *FileFilter) GetRules(ruleFiles []string) ([]string, error) {
files := fw.GetAllFiles()
// Default file filter for gitignore like filters -> refactored implementation

// For .gitignore, .snyk etc
type IgnoresFileFilter struct {
ignores *gitignore.GitIgnore
}

func NewIgnoresFileFilterFromIgnoreFiles(path string, ignoresFiles []string, logger *zerolog.Logger) (*IgnoresFileFilter, error) {
files := getAllFiles(path, logger)
rules, err := getRules(files, ignoresFiles, logger)
if err != nil {
return nil, err
}

return &IgnoresFileFilter{ignores: gitignore.CompileIgnoreLines(rules...)}, nil
}

// For any other glob like filtering
func NewIgnoresFileFilterFromGlobs(globs []string) (*IgnoresFileFilter, error) {
return &IgnoresFileFilter{ignores: gitignore.CompileIgnoreLines(globs...)}, nil
}

func (ff *IgnoresFileFilter) Filter(path string) bool {
if ff.ignores == nil {
return false
}
return ff.ignores.MatchesPath(path)
}

// getRules builds a list of glob patterns that can be used to filter filesToFilter
func getRules(files chan string, ruleFiles []string, logger *zerolog.Logger) ([]string, error) {
defaultRules := []string{"**/.git/**"}
// iterate filesToFilter channel and find ignore filesToFilter
var ignoreFiles = make([]string, 0)
for file := range files {
Expand All @@ -94,53 +163,41 @@ func (fw *FileFilter) GetRules(ruleFiles []string) ([]string, error) {
}

// iterate ignore filesToFilter and extract glob patterns
globs, err := fw.buildGlobs(ignoreFiles)
globs, err := buildGlobs(ignoreFiles, logger)
if err != nil {
return nil, err
}

return append(fw.defaultRules, globs...), nil
return append(defaultRules, globs...), nil
}

// GetFilteredFiles returns a filtered channel of filepaths from a given channel of filespaths and glob patterns to filter on
func (fw *FileFilter) GetFilteredFiles(filesCh chan string, globs []string) chan string {
var filteredFilesCh = make(chan string)

// create pattern matcher used to match filesToFilter to glob patterns
globPatternMatcher := gitignore.CompileIgnoreLines(globs...)
// GetAllFiles traverses a given dir path and fetches all filesToFilter in the directory
func getAllFiles(path string, logger *zerolog.Logger) chan string {
var filesCh = make(chan string)
go func() {
ctx := context.Background()
availableThreads := semaphore.NewWeighted(fw.max_threads)

defer close(filteredFilesCh)
defer close(filesCh)

// iterate the filesToFilter channel
for file := range filesCh {
err := availableThreads.Acquire(ctx, 1)
err := filepath.WalkDir(path, func(path string, d fs.DirEntry, err error) error {
if err != nil {
fw.logger.Err(err).Msg("failed to limit threads")
return err
}
go func(f string) {
defer availableThreads.Release(1)
// filesToFilter that do not match the glob pattern are filtered
if !globPatternMatcher.MatchesPath(f) {
filteredFilesCh <- f
}
}(file)
}

// wait until the last thread is done
err := availableThreads.Acquire(ctx, fw.max_threads)
if !d.IsDir() {
filesCh <- path
}

return err
})
if err != nil {
fw.logger.Err(err).Msg("failed to wait for all threads")
logger.Error().Msgf("walk dir failed: %v", err)
}
}()

return filteredFilesCh
return filesCh
}

// buildGlobs iterates a list of ignore filesToFilter and returns a list of glob patterns that can be used to test for ignored filesToFilter
func (fw *FileFilter) buildGlobs(ignoreFiles []string) ([]string, error) {
func buildGlobs(ignoreFiles []string, logger *zerolog.Logger) ([]string, error) {
var globs = make([]string, 0)
for _, ignoreFile := range ignoreFiles {
var content []byte
Expand All @@ -150,7 +207,7 @@ func (fw *FileFilter) buildGlobs(ignoreFiles []string) ([]string, error) {
}

if filepath.Base(ignoreFile) == ".snyk" { // .snyk files are yaml files and should be parsed differently
parsedRules := fw.parseDotSnykFile(content, filepath.Dir(ignoreFile))
parsedRules := parseDotSnykFile(content, filepath.Dir(ignoreFile), logger)
globs = append(globs, parsedRules...)
} else { // .gitignore, .dcignore, etc. are just a list of ignore rules
parsedRules := parseIgnoreFile(content, filepath.Dir(ignoreFile))
Expand All @@ -162,7 +219,7 @@ func (fw *FileFilter) buildGlobs(ignoreFiles []string) ([]string, error) {
}

// parseDotSnykFile builds a list of glob patterns from a given .snyk style file
func (fw *FileFilter) parseDotSnykFile(content []byte, filePath string) []string {
func parseDotSnykFile(content []byte, filePath string, logger *zerolog.Logger) []string {
type DotSnykRules struct {
Exclude struct {
Code []string `yaml:"code"`
Expand All @@ -173,7 +230,7 @@ func (fw *FileFilter) parseDotSnykFile(content []byte, filePath string) []string
var rules DotSnykRules
err := yaml.Unmarshal(content, &rules)
if err != nil {
fw.logger.Error().Msgf("parse .snyk failed: %v", err)
logger.Error().Msgf("parse .snyk failed: %v", err)
return nil
}

Expand Down
Loading