Skip to content

Commit 4426598

Browse files
committed
feat: file filter proposal + secrets [PS-105]
1 parent 4d084c5 commit 4426598

File tree

6 files changed

+488
-169
lines changed

6 files changed

+488
-169
lines changed
Lines changed: 152 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,152 @@
1+
package file_filter
2+
3+
import (
4+
"context"
5+
"fmt"
6+
"github.com/rs/zerolog"
7+
"golang.org/x/sync/semaphore"
8+
"io/fs"
9+
"path/filepath"
10+
"runtime"
11+
)
12+
13+
type Filterable interface {
14+
Filter(path string) bool
15+
}
16+
17+
type FileFilter struct {
18+
path string
19+
logger *zerolog.Logger
20+
filterStrategies []Filterable
21+
max_threads int64
22+
}
23+
24+
type FileFilterOption func(*FileFilter) error
25+
26+
func WithThreadNumber(maxThreadCount int) FileFilterOption {
27+
return func(filter *FileFilter) error {
28+
if maxThreadCount > 0 {
29+
filter.max_threads = int64(maxThreadCount)
30+
return nil
31+
}
32+
33+
return fmt.Errorf("max thread count must be greater than 0")
34+
}
35+
}
36+
37+
func WithFileFilterStrategies(strategies []Filterable) FileFilterOption {
38+
return func(filter *FileFilter) error {
39+
filter.filterStrategies = append(filter.filterStrategies, strategies...)
40+
return nil
41+
}
42+
}
43+
44+
func WithSecretsFileFilter(path string, logger *zerolog.Logger) FileFilterOption {
45+
return func(filter *FileFilter) error {
46+
secretsFilter, err := NewSecretsFileFilter(path, logger)
47+
if err != nil {
48+
return fmt.Errorf("error creating secrets filter: %w", err)
49+
50+
}
51+
filter.filterStrategies = append(filter.filterStrategies, secretsFilter...)
52+
return nil
53+
}
54+
}
55+
56+
func WithDefaultRulesFilter() FileFilterOption {
57+
return func(filter *FileFilter) error {
58+
defaultFilter, err := NewIgnoresFileFilterFromGlobs([]string{"**/.git/**"})
59+
if err != nil {
60+
return fmt.Errorf("error creating default filter: %w", err)
61+
62+
}
63+
64+
filter.filterStrategies = append(filter.filterStrategies, defaultFilter)
65+
return nil
66+
}
67+
}
68+
69+
func NewFileFilter(path string, logger *zerolog.Logger, options ...FileFilterOption) *FileFilter {
70+
filter := &FileFilter{
71+
path: path,
72+
logger: logger,
73+
max_threads: int64(runtime.NumCPU()),
74+
}
75+
76+
for _, option := range options {
77+
err := option(filter)
78+
if err != nil {
79+
logger.Err(err).Msg("failed to apply option for FileFilter")
80+
}
81+
}
82+
83+
return filter
84+
}
85+
86+
// GetAllFiles traverses a given dir path and fetches all filesToFilter in the directory
87+
func (fw *FileFilter) GetAllFiles() chan string {
88+
var filesCh = make(chan string)
89+
go func() {
90+
defer close(filesCh)
91+
92+
err := filepath.WalkDir(fw.path, func(path string, d fs.DirEntry, err error) error {
93+
if err != nil {
94+
return err
95+
}
96+
97+
if !d.IsDir() {
98+
filesCh <- path
99+
}
100+
101+
return err
102+
})
103+
if err != nil {
104+
fw.logger.Error().Msgf("walk dir failed: %v", err)
105+
}
106+
}()
107+
108+
return filesCh
109+
}
110+
111+
// GetFilteredFiles returns a filtered channel of filepaths from a given channel of filespaths and glob patterns to filter on
112+
func (fw *FileFilter) GetFilteredFiles(filesCh chan string) chan string {
113+
var filteredFilesCh = make(chan string)
114+
115+
go func() {
116+
ctx := context.Background()
117+
availableThreads := semaphore.NewWeighted(fw.max_threads)
118+
119+
defer close(filteredFilesCh)
120+
121+
// iterate the filesToFilter channel
122+
for file := range filesCh {
123+
err := availableThreads.Acquire(ctx, 1)
124+
if err != nil {
125+
fw.logger.Err(err).Msg("failed to limit threads")
126+
}
127+
go func(f string) {
128+
defer availableThreads.Release(1)
129+
// filesToFilter that do not match the filter list are excluded
130+
keepFile := true
131+
for _, filter := range fw.filterStrategies {
132+
if filter.Filter(f) {
133+
keepFile = false
134+
break
135+
}
136+
}
137+
138+
if keepFile {
139+
filteredFilesCh <- f
140+
}
141+
}(file)
142+
}
143+
144+
// wait until the last thread is done
145+
err := availableThreads.Acquire(ctx, fw.max_threads)
146+
if err != nil {
147+
fw.logger.Err(err).Msg("failed to wait for all threads")
148+
}
149+
}()
150+
151+
return filteredFilesCh
152+
}
Lines changed: 11 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
1-
package utils
1+
package file_filter
22

33
import (
44
"fmt"
5+
"github.com/snyk/go-application-framework/pkg/utils"
56
"github.com/stretchr/testify/require"
67
"os"
78
"path/filepath"
@@ -134,10 +135,10 @@ func TestFileFilter_GetFilteredFiles(t *testing.T) {
134135
t.Run(testCase.name, func(t *testing.T) {
135136
setupTestFileSystem(t, testCase)
136137

137-
globFileFilter, err := NewGlobFileFilter(testCase.repoPath, testCase.ruleFiles, &log.Logger)
138+
globFileFilter, err := NewIgnoresFileFilter(testCase.repoPath, testCase.ruleFiles, &log.Logger)
138139
require.NoError(t, err)
139140

140-
fileFilter := NewFileFilter(testCase.repoPath, &log.Logger, WithFileFilterStrategies([]FileFilterStrategy{globFileFilter}))
141+
fileFilter := NewFileFilter(testCase.repoPath, &log.Logger, WithFileFilterStrategies([]Filterable{globFileFilter}))
141142
files := fileFilter.GetAllFiles()
142143

143144
filteredFiles := fileFilter.GetFilteredFiles(files)
@@ -184,10 +185,10 @@ func BenchmarkFileFilter_GetFilteredFiles(b *testing.B) {
184185

185186
b.ResetTimer()
186187
for n := 0; n < b.N; n++ {
187-
globFileFilter, err := NewGlobFileFilter(rootDir, ruleFiles, &log.Logger)
188+
globFileFilter, err := NewIgnoresFileFilter(rootDir, ruleFiles, &log.Logger)
188189
assert.NoError(b, err)
189190

190-
fileFilter := NewFileFilter(rootDir, &log.Logger, WithFileFilterStrategies([]FileFilterStrategy{globFileFilter}), WithThreadNumber(runtime.NumCPU()))
191+
fileFilter := NewFileFilter(rootDir, &log.Logger, WithFileFilterStrategies([]Filterable{globFileFilter}), WithThreadNumber(runtime.NumCPU()))
191192

192193
b.StartTimer()
193194
filteredFiles := fileFilter.GetFilteredFiles(fileFilter.GetAllFiles())
@@ -444,7 +445,7 @@ func TestParseIgnoreRuleToGlobs(t *testing.T) {
444445

445446
for _, tc := range testCases {
446447
t.Run(tc.name, func(t *testing.T) {
447-
globs := parseIgnoreRuleToGlobs(tc.rule, tc.baseDir)
448+
globs := utils.parseIgnoreRuleToGlobs(tc.rule, tc.baseDir)
448449
assert.ElementsMatch(t, tc.expectedGlobs, globs,
449450
"Rule: %q, Expected: %v, Got: %v", tc.rule, tc.expectedGlobs, globs)
450451
})
@@ -473,10 +474,10 @@ func TestFileFilter_SlashPatternInGitIgnore(t *testing.T) {
473474
createFileInPath(t, gitignorePath, []byte("/"))
474475

475476
// Test file filtering
476-
globFileFilter, err := NewGlobFileFilter(tempDir, []string{".gitignore"}, &log.Logger)
477+
globFileFilter, err := NewIgnoresFileFilter(tempDir, []string{".gitignore"}, &log.Logger)
477478
assert.NoError(t, err)
478479

479-
fileFilter := NewFileFilter(tempDir, &log.Logger, WithFileFilterStrategies([]FileFilterStrategy{globFileFilter}))
480+
fileFilter := NewFileFilter(tempDir, &log.Logger, WithFileFilterStrategies([]Filterable{globFileFilter}))
480481
rules, err := fileFilter.GetRules([]string{".gitignore"})
481482
assert.NoError(t, err)
482483

@@ -521,10 +522,10 @@ func TestFileFilter_SlashPatternInGitIgnore(t *testing.T) {
521522
createFileInPath(t, gitignorePath, []byte("/*"))
522523

523524
// Test file filtering
524-
globFileFilter, err := NewGlobFileFilter(tempDir, []string{".gitignore"}, &log.Logger)
525+
globFileFilter, err := NewIgnoresFileFilter(tempDir, []string{".gitignore"}, &log.Logger)
525526
assert.NoError(t, err)
526527

527-
fileFilter := NewFileFilter(tempDir, &log.Logger, WithFileFilterStrategies([]FileFilterStrategy{globFileFilter}))
528+
fileFilter := NewFileFilter(tempDir, &log.Logger, WithFileFilterStrategies([]Filterable{globFileFilter}))
528529

529530
// Get all files and filter them
530531
allFiles := fileFilter.GetAllFiles()

0 commit comments

Comments
 (0)