Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 42 additions & 0 deletions pkg/handlers/handlers.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package handlers
import (
"archive/zip"
"bufio"
"bytes"
"context"
"errors"
"fmt"
Expand All @@ -11,6 +12,7 @@ import (

"github.com/gabriel-vasile/mimetype"
"github.com/mholt/archives"
"google.golang.org/protobuf/proto"

logContext "github.com/trufflesecurity/trufflehog/v3/pkg/context"
"github.com/trufflesecurity/trufflehog/v3/pkg/feature"
Expand Down Expand Up @@ -405,6 +407,8 @@ func handleChunksWithError(
chunkSkel *sources.Chunk,
reporter sources.ChunkReporter,
) error {
var linesConsumed int64
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just linesConsumed := 0 is OK here; this will be an int which is 64-bit on all platforms we care about


for {
select {
case dataOrErr, ok := <-dataErrChan:
Expand All @@ -422,7 +426,13 @@ func handleChunksWithError(
}
if len(dataOrErr.Data) > 0 {
chunk := *chunkSkel
if chunk.SourceMetadata != nil {
if cloned, ok := proto.Clone(chunk.SourceMetadata).(*source_metadatapb.MetaData); ok {
chunk.SourceMetadata = cloned
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not sure what this is doing, but for various personal reasons I'm behind on sleep 😹 so it's likely I'm missing something obvious

}
}
chunk.Data = dataOrErr.Data
linesConsumed = updateFilesystemLineMetadata(&chunk, linesConsumed)
if err := reporter.ChunkOk(ctx, chunk); err != nil {
return fmt.Errorf("error reporting chunk: %w", err)
}
Expand All @@ -433,6 +443,38 @@ func handleChunksWithError(
}
}

// updateFilesystemLineMetadata sets the 1-based starting line for filesystem chunks and
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Great comment, love it 🙌🏻

// updates the running total of lines consumed so subsequent chunks can be
// correctly anchored. Only the unique portion of the chunk (excluding the peek
// overlap) contributes to the running count so that lines aren't double counted.
//
// This relies on HandleFile's default chunk reader, which emits chunks that
// contain DefaultChunkSize bytes of unique data followed by a DefaultPeekSize
// overlap with the next chunk.
func updateFilesystemLineMetadata(chunk *sources.Chunk, linesConsumed int64) int64 {
if chunk.SourceMetadata == nil {
return linesConsumed
}
fsMeta := chunk.SourceMetadata.GetFilesystem()
if fsMeta == nil {
return linesConsumed
}

fsMeta.Line = linesConsumed + 1

data := chunk.Data
if len(data) == 0 {
return linesConsumed
}

uniqueLen := len(data)
if uniqueLen > sources.DefaultChunkSize {
uniqueLen = sources.DefaultChunkSize
}

return linesConsumed + int64(bytes.Count(data[:uniqueLen], []byte("\n")))
}

// isFatal determines whether the given error is a fatal error that should
// terminate processing the current file, or a non-critical error that can be logged and ignored.
// "Fatal" errors include context cancellation, deadline exceeded, and the
Expand Down
50 changes: 50 additions & 0 deletions pkg/handlers/handlers_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,12 @@ import (
"time"

"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
diskbufferreader "github.com/trufflesecurity/disk-buffer-reader"

"github.com/trufflesecurity/trufflehog/v3/pkg/context"
"github.com/trufflesecurity/trufflehog/v3/pkg/pb/source_metadatapb"
"github.com/trufflesecurity/trufflehog/v3/pkg/pb/sourcespb"
"github.com/trufflesecurity/trufflehog/v3/pkg/sources"
)

Expand Down Expand Up @@ -153,6 +156,53 @@ func BenchmarkHandleFile(b *testing.B) {
}
}

func TestHandleChunksWithErrorSetsFilesystemLine(t *testing.T) {
chunkCh := make(chan *sources.Chunk, 2)
reporter := sources.ChanReporter{Ch: chunkCh}

chunkSkel := &sources.Chunk{
SourceType: sourcespb.SourceType_SOURCE_TYPE_FILESYSTEM,
SourceMetadata: &source_metadatapb.MetaData{
Data: &source_metadatapb.MetaData_Filesystem{
Filesystem: &source_metadatapb.Filesystem{File: "test.txt"},
},
},
}

chunkSize := sources.DefaultChunkSize
peekSize := sources.DefaultPeekSize

chunkOneMain := bytes.Repeat([]byte("a\n"), chunkSize/2)
chunkOnePeek := bytes.Repeat([]byte("p\n"), peekSize/2)
chunkOne := append(chunkOneMain, chunkOnePeek...)

chunkTwo := bytes.Repeat([]byte("b\n"), 10)

dataErrChan := make(chan DataOrErr, 2)
dataErrChan <- DataOrErr{Data: chunkOne}
dataErrChan <- DataOrErr{Data: chunkTwo}
close(dataErrChan)

require.NoError(t, handleChunksWithError(context.Background(), dataErrChan, chunkSkel, reporter))

close(chunkCh)
var chunks []*sources.Chunk
for ch := range chunkCh {
chunks = append(chunks, ch)
}

require.Len(t, chunks, 2)

firstMeta := chunks[0].SourceMetadata.GetFilesystem()
require.NotNil(t, firstMeta)
require.Equal(t, int64(1), firstMeta.GetLine())

linesInFirstChunk := int64(bytes.Count(chunkOne[:chunkSize], []byte("\n")))
secondMeta := chunks[1].SourceMetadata.GetFilesystem()
require.NotNil(t, secondMeta)
require.Equal(t, linesInFirstChunk+1, secondMeta.GetLine())
}

func TestSkipArchive(t *testing.T) {
file, err := os.Open("testdata/test.tgz")
assert.Nil(t, err)
Expand Down