8000 Add support for delta reuse by kempniu · Pull Request #7040 · libgit2/libgit2 · GitHub
[go: up one dir, main page]

Skip to content

Add support for delta reuse #7040

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 8 commits into
base: main
Choose a base branch
from
Open
8000
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
pack: implement loading reverse index from disk
Pack file index enables quickly mapping OIDs to entry offsets, but not
the other way round.  Furthermore, pack file entries do not store the
size of the compressed object representation.  Performing both of these
operations quickly is necessary for robust extraction of compressed pack
file data at arbitrary offsets.

This is where the pack file reverse index comes in handy.  It is an
array of index entry numbers ordered by pack file offset, which enables
quickly mapping entry offsets to OIDs (using binary search) and
determining compressed object representation size (by checking the
offset of the next entry in the pack file).

Implement loading reverse index files ("pack-*.rev") in a similar way as
for index files ("pack-*.idx").  Perform some basic sanity checks on
reverse index contents.
  • Loading branch information
kempniu committed Feb 16, 2025
commit b97a2b566cebaed9e2d6f32acf12698f342a888c
96 changes: 96 additions & 0 deletions src/libgit2/pack.c
Original file line number Diff line number Diff line change
Expand Up @@ -346,6 +346,101 @@ static int pack_index_open_locked(struct git_pack_file *p)
return error;
}

static void pack_revindex_free(struct git_pack_file *p)
{
if (p->revindex_map.data) {
git_futils_mmap_free(&p->revindex_map);
p->revindex_map.data = NULL;
}
}

/* Run with the packfile lock held */
static int pack_revindex_check_locked(const char *path, struct git_pack_file *p)
{
struct git_pack_ridx_header *hdr;
uint32_t version, i, *revindex;
size_t ridx_size;
struct stat st;
int error;

/* TODO: properly open the file without access time using O_NOATIME */
git_file fd = git_futils_open_ro(path);
if (fd < 0)
return fd;

if (p_fstat(fd, &st) < 0) {
p_close(fd);
git_error_set(GIT_ERROR_OS, "unable to stat pack reverse index '%s'", path);
return -1;
}

if (!S_ISREG(st.st_mode) ||
!git__is_sizet(st.st_size) ||
(ridx_size = (size_t)st.st_size) != (size_t)(sizeof(struct git_pack_ridx_header) + (4 * p->num_objects) + (p->oid_size * 2))) {
p_close(fd);
git_error_set(GIT_ERROR_ODB, "invalid pack reverse index '%s'", path);
return -1;
}

error = git_futils_mmap_ro(&p->revindex_map, fd, 0, ridx_size);

p_close(fd);

if (error < 0)
return error;

hdr = p->revindex_map.data;

if (hdr->ridx_signature != htonl(PACK_RIDX_SIGNATURE)) {
return packfile_error("invalid reverse index signature");
}

version = ntohl(hdr->ridx_version);

if (version < 1 || version > 1) {
git_futils_mmap_free(&p->revindex_map);
return packfile_error("unsupported reverse index version");
}

if (ntohl(hdr->ridx_oid_type) != p->oid_type) {
git_futils_mmap_free(&p->revindex_map);
return packfile_error("reverse index hash function mismatch");
}

revindex = (uint32_t *)(hdr + 1);

for (i = 0; i < p->num_objects; i++) {
if (ntohl(revindex[i]) > p->num_objects) {
git_futils_mmap_free(&p->revindex_map);
return packfile_error("invalid reverse index entry");
}
}

return 0;
}

/* Run with the packfile lock held */
static int pack_revindex_open_locked(struct git_pack_file *p)
{
int error;
git_str ridx_name;

if (p->revindex_map.data)
return 0;

if (p->index_map.data == NULL && ((error = pack_index_open_locked(p)) < 0))
return error;

if ((error = pack_get_suffixed_file_path(&ridx_name, p, ".rev")) < 0)
return error;

error = pack_revindex_check_locked(ridx_name.ptr, p);

git_str_dispose(&ridx_name);

return error;
}

static unsigned char *pack_window_open(
struct git_pack_file *p,
git_mwindow **w_cursor,
Expand Down Expand Up @@ -1080,6 +1175,7 @@ void git_packfile_free(struct git_pack_file *p, bool unlink_packfile)
p_unlink(p->pack_name);

pack_index_free(p);
pack_revindex_free(p);

git__free(p->bad_object_ids);

Expand Down
11 changes: 10 additions & 1 deletion src/libgit2/pack.h
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,14 @@ struct git_pack_idx_header {
uint32_t idx_version;
};

#define PACK_RIDX_SIGNATURE 0x52494458 /* "RIDX" */

struct git_pack_ridx_header {
uint32_t ridx_signature;
uint32_t ridx_version;
uint32_t ridx_oid_type;
};

typedef struct git_pack_cache_entry {
size_t last_usage; /* enough? */
git_atomic32 refcount;
Expand Down Expand Up @@ -108,7 +116,8 @@ typedef struct {
struct git_pack_file {
git_mwindow_file mwf;
git_map index_map;
git_mutex lock; /* protect updates to index_map */
git_map revindex_map;
git_mutex lock; /* protect updates to index_map & revindex_map */
git_atomic32 refcount;

uint32_t num_objects;
Expand Down
0