-
Couldn't load subscription status.
- Fork 2.1k
Issue #1876 Implement line tracking for filesystem chunks in handleChunksWithError #4491
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -3,6 +3,7 @@ package handlers | |
| import ( | ||
| "archive/zip" | ||
| "bufio" | ||
| "bytes" | ||
| "context" | ||
| "errors" | ||
| "fmt" | ||
|
|
@@ -11,6 +12,7 @@ import ( | |
|
|
||
| "github.com/gabriel-vasile/mimetype" | ||
| "github.com/mholt/archives" | ||
| "google.golang.org/protobuf/proto" | ||
|
|
||
| logContext "github.com/trufflesecurity/trufflehog/v3/pkg/context" | ||
| "github.com/trufflesecurity/trufflehog/v3/pkg/feature" | ||
|
|
@@ -405,6 +407,8 @@ func handleChunksWithError( | |
| chunkSkel *sources.Chunk, | ||
| reporter sources.ChunkReporter, | ||
| ) error { | ||
| var linesConsumed int64 | ||
|
|
||
| for { | ||
| select { | ||
| case dataOrErr, ok := <-dataErrChan: | ||
|
|
@@ -422,7 +426,13 @@ func handleChunksWithError( | |
| } | ||
| if len(dataOrErr.Data) > 0 { | ||
| chunk := *chunkSkel | ||
| if chunk.SourceMetadata != nil { | ||
| if cloned, ok := proto.Clone(chunk.SourceMetadata).(*source_metadatapb.MetaData); ok { | ||
| chunk.SourceMetadata = cloned | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm not sure what this is doing, but for various personal reasons I'm behind on sleep 😹 so it's likely I'm missing something obvious |
||
| } | ||
| } | ||
| chunk.Data = dataOrErr.Data | ||
| linesConsumed = updateFilesystemLineMetadata(&chunk, linesConsumed) | ||
| if err := reporter.ChunkOk(ctx, chunk); err != nil { | ||
| return fmt.Errorf("error reporting chunk: %w", err) | ||
| } | ||
|
|
@@ -433,6 +443,38 @@ func handleChunksWithError( | |
| } | ||
| } | ||
|
|
||
| // updateFilesystemLineMetadata sets the 1-based starting line for filesystem chunks and | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Great comment, love it 🙌🏻 |
||
| // updates the running total of lines consumed so subsequent chunks can be | ||
| // correctly anchored. Only the unique portion of the chunk (excluding the peek | ||
| // overlap) contributes to the running count so that lines aren't double counted. | ||
| // | ||
| // This relies on HandleFile's default chunk reader, which emits chunks that | ||
| // contain DefaultChunkSize bytes of unique data followed by a DefaultPeekSize | ||
| // overlap with the next chunk. | ||
| func updateFilesystemLineMetadata(chunk *sources.Chunk, linesConsumed int64) int64 { | ||
| if chunk.SourceMetadata == nil { | ||
| return linesConsumed | ||
| } | ||
| fsMeta := chunk.SourceMetadata.GetFilesystem() | ||
| if fsMeta == nil { | ||
| return linesConsumed | ||
| } | ||
|
|
||
| fsMeta.Line = linesConsumed + 1 | ||
|
|
||
| data := chunk.Data | ||
| if len(data) == 0 { | ||
| return linesConsumed | ||
| } | ||
|
|
||
| uniqueLen := len(data) | ||
| if uniqueLen > sources.DefaultChunkSize { | ||
| uniqueLen = sources.DefaultChunkSize | ||
| } | ||
|
|
||
| return linesConsumed + int64(bytes.Count(data[:uniqueLen], []byte("\n"))) | ||
| } | ||
|
|
||
| // isFatal determines whether the given error is a fatal error that should | ||
| // terminate processing the current file, or a non-critical error that can be logged and ignored. | ||
| // "Fatal" errors include context cancellation, deadline exceeded, and the | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Just
linesConsumed := 0is OK here; this will be anintwhich is 64-bit on all platforms we care about