Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion gitoxide-core/src/repository/merge_base.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@ pub fn merge_base(
let first_id = repo.rev_parse_single(first.as_str())?;
let other_ids: Vec<_> = others
.iter()
.cloned()
.map(|other| repo.rev_parse_single(other.as_str()).map(gix::Id::detach))
.collect::<Result<_, _>>()?;

Expand Down
3 changes: 2 additions & 1 deletion gix-diff/src/blob/unified_diff/impls.rs
Original file line number Diff line number Diff line change
Expand Up @@ -260,7 +260,8 @@ where
}

impl DiffLineKind {
const fn to_prefix(self) -> char {
/// Returns a one-character representation for use in unified diffs.
pub const fn to_prefix(self) -> char {
match self {
DiffLineKind::Context => ' ',
DiffLineKind::Add => '+',
Expand Down
26 changes: 26 additions & 0 deletions gix-diff/tests/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
## How to run diff-slider tests

The idea is to use https://github.com/mhagger/diff-slider-tools to create slider information for use to generate a test which invokes our own implementation and compares it to Git itself.
Follow these instructions to set it up.

1. DIFF_SLIDER_TOOLS=/your/anticipated/path/to/diff-slider-tools
2. `git clone https://github.com/mhagger/diff-slider-tools $DIFF_SLIDER_TOOLS`
3. `pushd $DIFF_SLIDER_TOOLS`
4. Follow [these instructions](https://github.com/mhagger/diff-slider-tools/blob/b59ed13d7a2a6cfe14a8f79d434b6221cc8b04dd/README.md?plain=1#L122-L146) to generate a file containing the slider information. Be sure to set the `repo` variable as it's used in later script invocations.
- Note that `get-corpus` must be run with `./get-corpus`.
- You can use an existing repository, for instance via `repo=git-human`, so there is no need to find your own repository to test.
- The script suite is very slow, and it's recommended to only set a range of commits, or use a small repository for testing.

Finally, run the `internal-tools` program to turn that file into a fixture called `make_diff_for_sliders_repo.sh`.

```shell
# run inside `gitoxide`
popd
cargo run --package internal-tools -- \
create-diff-cases \
--sliders-file $DIFF_SLIDER_TOOLS/corpus/$repo.sliders \
--worktree-dir $DIFF_SLIDER_TOOLS/corpus/$repo.git/ \
--destination-dir gix-diff/tests/fixtures/
```

Finally, run `cargo test -p gix-diff-tests sliders -- --nocapture` to execute the actual tests to compare.
1 change: 1 addition & 0 deletions gix-diff/tests/diff/blob/mod.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
pub(crate) mod pipeline;
mod platform;
mod slider;
mod unified_diff;
253 changes: 253 additions & 0 deletions gix-diff/tests/diff/blob/slider.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,253 @@
use gix_diff::blob::intern::TokenSource;
use gix_diff::blob::unified_diff::ContextSize;
use gix_diff::blob::{Algorithm, UnifiedDiff};
use gix_testtools::bstr::{BString, ByteVec};

#[test]
fn baseline() -> gix_testtools::Result {
let worktree_path = gix_testtools::scripted_fixture_read_only_standalone("make_diff_for_sliders_repo.sh")?;
let asset_dir = worktree_path.join("assets");

let dir = std::fs::read_dir(&worktree_path)?;

let mut count = 0;
for entry in dir {
let entry = entry?;
let file_name = entry.file_name().into_string().expect("to be string");

if !file_name.ends_with(".baseline") {
continue;
}
count += 1;

let parts: Vec<_> = file_name.split('.').collect();
let [name, algorithm, ..] = parts[..] else {
unreachable!()
};
let algorithm = match algorithm {
"myers" => Algorithm::Myers,
"histogram" => Algorithm::Histogram,
_ => unreachable!(),
};

let parts: Vec<_> = name.split('-').collect();
let [old_blob_id, new_blob_id] = parts[..] else {
unreachable!();
};

let old_data = std::fs::read(asset_dir.join(format!("{old_blob_id}.blob")))?;
let new_data = std::fs::read(asset_dir.join(format!("{new_blob_id}.blob")))?;

let interner = gix_diff::blob::intern::InternedInput::new(
tokens_for_diffing(old_data.as_slice()),
tokens_for_diffing(new_data.as_slice()),
);

let actual = gix_diff::blob::diff(
algorithm,
&interner,
UnifiedDiff::new(
&interner,
baseline::DiffHunkRecorder::new(),
ContextSize::symmetrical(3),
),
)?;

let baseline_path = worktree_path.join(file_name);
let baseline = std::fs::read(baseline_path)?;
let baseline = baseline::Baseline::new(&baseline);

let actual = actual
.iter()
.fold(BString::default(), |mut acc, diff_hunk| {
acc.push_str(diff_hunk.header.to_string().as_str());
acc.push(b'\n');

acc.extend_from_slice(&diff_hunk.lines);

acc
})
.to_string();

let baseline = baseline
.fold(BString::default(), |mut acc, diff_hunk| {
acc.push_str(diff_hunk.header.to_string().as_str());
acc.push(b'\n');

acc.extend_from_slice(&diff_hunk.lines);

acc
})
.to_string();

pretty_assertions::assert_eq!(actual, baseline);
}

if count == 0 {
eprintln!("Slider baseline isn't setup - look at ./gix-diff/tests/README.md for instructions");
}

Ok(())
}

fn tokens_for_diffing(data: &[u8]) -> impl TokenSource<Token = &[u8]> {
gix_diff::blob::sources::byte_lines(data)
}

mod baseline {
use gix_object::bstr::ByteSlice;
use std::iter::Peekable;

use gix_diff::blob::unified_diff::{ConsumeHunk, HunkHeader};
use gix_object::bstr::{self, BString};

static START_OF_HEADER: &[u8; 4] = b"@@ -";

#[derive(Debug, PartialEq)]
pub struct DiffHunk {
pub header: HunkHeader,
pub lines: BString,
}

pub struct DiffHunkRecorder {
inner: Vec<DiffHunk>,
}

impl DiffHunkRecorder {
pub fn new() -> Self {
Self { inner: Vec::new() }
}
}

impl ConsumeHunk for DiffHunkRecorder {
type Out = Vec<DiffHunk>;

fn consume_hunk(
&mut self,
header: HunkHeader,
lines: &[(gix_diff::blob::unified_diff::DiffLineKind, &[u8])],
) -> std::io::Result<()> {
let mut buf = Vec::new();

for &(kind, line) in lines {
buf.push(kind.to_prefix() as u8);
buf.extend_from_slice(line);
buf.push(b'\n');
}

let diff_hunk = DiffHunk {
header,
lines: buf.into(),
};

self.inner.push(diff_hunk);

Ok(())
}

fn finish(self) -> Self::Out {
self.inner
}
}

type Lines<'a> = Peekable<bstr::Lines<'a>>;

pub struct Baseline<'a> {
lines: Lines<'a>,
}

impl<'a> Baseline<'a> {
pub fn new(content: &'a [u8]) -> Baseline<'a> {
let mut lines = content.lines().peekable();
skip_header(&mut lines);
Baseline { lines }
}
}

impl Iterator for Baseline<'_> {
type Item = DiffHunk;

fn next(&mut self) -> Option<Self::Item> {
let mut hunk_header = None;
let mut hunk_lines = Vec::new();

while let Some(line) = self.lines.next() {
if line.starts_with(START_OF_HEADER) {
assert!(hunk_header.is_none(), "should not overwrite existing hunk_header");
hunk_header = parse_hunk_header(line).ok();

continue;
}

match line[0] {
b' ' | b'+' | b'-' => {
hunk_lines.extend_from_slice(line);
hunk_lines.push(b'\n');
}
_ => unreachable!("BUG: expecting unified diff format"),
}

match self.lines.peek() {
Some(next_line) if next_line.starts_with(START_OF_HEADER) => break,
None => break,
_ => {}
}
}

hunk_header.map(|hunk_header| DiffHunk {
header: hunk_header,
lines: hunk_lines.into(),
})
}
}

fn skip_header(lines: &mut Lines) {
// diff --git a/aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa b/bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb
// index ccccccc..ddddddd 100644
// --- a/aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
// +++ b/bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb

let line = lines.next().expect("line to be present");
assert!(line.starts_with(b"diff --git "));

let line = lines.next().expect("line to be present");
assert!(line.starts_with(b"index "));

let line = lines.next().expect("line to be present");
assert!(line.starts_with(b"--- "));

let line = lines.next().expect("line to be present");
assert!(line.starts_with(b"+++ "));
}

/// Parse diff hunk headers that conform to the unified diff hunk header format.
///
/// The parser is very primitive and relies on the fact that `+18` is parsed as `18`. This
/// allows us to split the input on ` ` and `,` only.
///
/// @@ -18,6 +18,7 @@ abc def ghi
/// @@ -{before_hunk_start},{before_hunk_len} +{after_hunk_start},{after_hunk_len} @@
fn parse_hunk_header(line: &[u8]) -> gix_testtools::Result<HunkHeader> {
let line = line.strip_prefix(START_OF_HEADER).unwrap();

let parts: Vec<_> = line.split(|b| *b == b' ' || *b == b',').collect();
let [before_hunk_start, before_hunk_len, after_hunk_start, after_hunk_len, ..] = parts[..] else {
unreachable!()
};

Ok(HunkHeader {
before_hunk_start: parse_number(before_hunk_start),
before_hunk_len: parse_number(before_hunk_len),
after_hunk_start: parse_number(after_hunk_start),
after_hunk_len: parse_number(after_hunk_len),
})
}

fn parse_number(bytes: &[u8]) -> u32 {
bytes
.to_str()
.expect("to be a valid UTF-8 string")
.parse::<u32>()
.expect("to be a number")
}
}
2 changes: 2 additions & 0 deletions gix-diff/tests/fixtures/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
# The slider assets that are needed for that slider fixture script to work and build the fixture.
/assets/
2 changes: 2 additions & 0 deletions gix-diff/tests/fixtures/generated-archives/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
# The auto-generated sliders fixtures. For now it's experimental, but we may store it later once it's all working.
/make_diff_for_sliders_repo.tar
8 changes: 8 additions & 0 deletions gix-diff/tests/fixtures/make_diff_for_sliders_repo.sh
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I love to keep this info, but it feels it would be more accessible in gix-diff/README.md (instead of a seemingly fake shell script).
Or am I missing something, maybe the placeholder is needed for something?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

And in that file, could you provide a snipped, behind <details><summary>… that I can use as a diff slider example?
That way I don't have to repeat all these steps and… run python… (yes, I am snobby ;)), just to try it out.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As far as I can tell, make_diff_for_sliders_repo.sh needs to be present for this line not to fail: https://github.com/cruessler/gitoxide/blob/dd5127f54ce4e055e6a87719b4962482cd82090d/gix-diff/tests/diff/blob/slider.rs#L257. I just deleted the script locally which made the associated test fail with a file not found message.

What do you mean by “diff slider example”? If you want the test to run on one single diff in order to compare the git baseline against gix-diff, you would need two .blob files in assets as well as the corresponding code in the script to generate the .baseline files (one for each of the 2 diff algorithms currently being tested). While that, in an of itself, would be easy to add, the resulting test would most likely fail as gix-diff doesn’t yet match the git baseline in most cases as far as I can tell. Which is why I didn’t even try in the first place. 😄

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Apart from that, I was also wondering where the best place for the info that currently lives in the script is. Feel free to move it to a more appropriate place, the README.md seems like a good choice to me!

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This clearly shows that I don't fully understand the setup yet. What I read made me think the slider file is all that's needed and the test only depends on that, but that's not the case as it doesn't include the actual blobs. It just refers to them by ID.

So I will be back with this tomorrow probably and spend the time that I probably tried to safe today 😅.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That kind of feedback is already very valuable! 😄 I’m happy to provide more context, hoping to make this a bit more self-contained.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I actually think you already did everything you could and I will use my ignorance and naiveté towards that matter to make it work for other readers with a similar level of ignorance and naiveté :D. So I think I am predestined to do that 😁.

Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
#!/usr/bin/env bash
set -eu -o pipefail

# This script is a placeholder for a script generated by the
# `create-diff-cases` subcommand of `internal-tools`. The placeholder does
# nothing except making the associated test trivially pass.
#
# See the `gix-diff/tests/README.md` file for more information.
30 changes: 30 additions & 0 deletions gix/src/repository/location.rs
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,36 @@ impl crate::Repository {
self.work_tree.as_deref()
}

/// Forcefully set the given `workdir` to be the worktree of this repository, *in memory*,
/// no matter if it had one or not, or unset it with `None`.
/// Return the previous working directory if one existed.
///
/// Fail if the `workdir`, if not `None`, isn't accessible or isn't a directory.
/// No change is performed on error.
///
/// ### About Worktrees
///
/// * When setting a main worktree to a linked worktree directory, this repository instance
/// will still claim that it is the [main worktree](crate::Worktree::is_main()) as that depends
/// on the `git_dir`, not the worktree dir.
/// * When setting a linked worktree to a main worktree directory, this repository instance
/// will still claim that it is *not* a [main worktree](crate::Worktree::is_main()) as that depends
/// on the `git_dir`, not the worktree dir.
#[doc(alias = "git2")]
pub fn set_workdir(&mut self, workdir: impl Into<Option<PathBuf>>) -> Result<Option<PathBuf>, std::io::Error> {
let workdir = workdir.into();
Ok(match workdir {
None => self.work_tree.take(),
Some(new_workdir) => {
_ = std::fs::read_dir(&new_workdir)?;

let old = self.work_tree.take();
self.work_tree = Some(new_workdir);
old
}
})
}

/// Return the work tree containing all checked out files, if there is one.
pub fn workdir(&self) -> Option<&std::path::Path> {
self.work_tree.as_deref()
Expand Down
5 changes: 3 additions & 2 deletions gix/src/repository/worktree.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ impl crate::Repository {
res.sort_by(|a, b| a.git_dir.cmp(&b.git_dir));
Ok(res)
}

/// Return the repository owning the main worktree, typically from a linked worktree.
///
/// Note that it might be the one that is currently open if this repository doesn't point to a linked worktree.
Expand All @@ -41,7 +42,7 @@ impl crate::Repository {

/// Return the currently set worktree if there is one, acting as platform providing a validated worktree base path.
///
/// Note that there would be `None` if this repository is `bare` and the parent [`Repository`][crate::Repository] was instantiated without
/// Note that there would be `None` if this repository is `bare` and the parent [`Repository`](crate::Repository) was instantiated without
/// registered worktree in the current working dir, even if no `.git` file or directory exists.
/// It's merely based on configuration, see [Worktree::dot_git_exists()] for a way to perform more validation.
pub fn worktree(&self) -> Option<Worktree<'_>> {
Expand All @@ -50,7 +51,7 @@ impl crate::Repository {

/// Return true if this repository is bare, and has no main work tree.
///
/// This is not to be confused with the [`worktree()`][crate::Repository::worktree()] worktree, which may exists if this instance
/// This is not to be confused with the [`worktree()`](crate::Repository::worktree()) method, which may exist if this instance
/// was opened in a worktree that was created separately.
pub fn is_bare(&self) -> bool {
self.config.is_bare && self.workdir().is_none()
Expand Down
Loading
Loading