diff --git a/dir.c b/dir.c index 026d8516a912af..37d3ba9aee3577 100644 --- a/dir.c +++ b/dir.c @@ -28,6 +28,7 @@ #include "ewah/ewok.h" #include "fsmonitor-ll.h" #include "read-cache-ll.h" +#include "trace.h" #include "setup.h" #include "sparse-index.h" #include "strbuf.h" @@ -1098,6 +1099,12 @@ static void do_invalidate_gitignore(struct untracked_cache_dir *dir) { int i; dir->valid = 0; + /* + * Clear the cached .gitignore content since the file may have + * changed. It will be re-read from disk on the next access. + */ + FREE_AND_NULL(dir->exclude_content); + dir->exclude_content_len = 0; for (size_t i = 0; i < dir->untracked_nr; i++) free(dir->untracked[i]); dir->untracked_nr = 0; @@ -1127,6 +1134,8 @@ static void invalidate_directory(struct untracked_cache *uc, uc->dir_invalidated++; dir->valid = 0; + FREE_AND_NULL(dir->exclude_content); + dir->exclude_content_len = 0; for (size_t i = 0; i < dir->untracked_nr; i++) free(dir->untracked[i]); dir->untracked_nr = 0; @@ -1145,10 +1154,15 @@ static void invalidate_directory(struct untracked_cache *uc, * If "oid_stat" is not NULL, compute oid of the exclude file and fill * stat data from disk (only valid if add_patterns returns zero). If * oid_stat.valid is non-zero, "oid_stat" must contain good value as input. + * + * If "content_out" and "content_len_out" are not NULL, store a copy of + * the raw file content for later caching (e.g. in untracked_cache_dir). + * The caller is responsible for freeing *content_out. */ static int add_patterns(const char *fname, const char *base, int baselen, struct pattern_list *pl, struct index_state *istate, - unsigned flags, struct oid_stat *oid_stat) + unsigned flags, struct oid_stat *oid_stat, + char **content_out, size_t *content_len_out) { struct stat st; int r; @@ -1218,6 +1232,16 @@ static int add_patterns(const char *fname, const char *base, int baselen, return -1; } + /* + * If the caller wants to cache the raw content (for + * fsmonitor-backed reuse), store a copy before parsing + * modifies the buffer in-place. + */ + if (content_out && content_len_out) { + *content_out = xmemdupz(buf, size); + *content_len_out = size; + } + add_patterns_from_buffer(buf, size, base, baselen, pl); free(buf); return 0; @@ -1258,7 +1282,8 @@ int add_patterns_from_file_to_list(const char *fname, const char *base, struct index_state *istate, unsigned flags) { - return add_patterns(fname, base, baselen, pl, istate, flags, NULL); + return add_patterns(fname, base, baselen, pl, istate, flags, NULL, + NULL, NULL); } int add_patterns_from_blob_to_list( @@ -1315,7 +1340,7 @@ static void add_patterns_from_file_1(struct dir_struct *dir, const char *fname, if (!dir->untracked) dir->internal.unmanaged_exclude_files++; pl = add_pattern_list(dir, EXC_FILE, fname); - if (add_patterns(fname, "", 0, pl, NULL, 0, oid_stat) < 0) + if (add_patterns(fname, "", 0, pl, NULL, 0, oid_stat, NULL, NULL) < 0) die(_("cannot use %s as an exclude file"), fname); } @@ -1770,11 +1795,68 @@ static void prep_exclude(struct dir_struct *dir, strbuf_addbuf(&sb, &dir->internal.basebuf); strbuf_addstr(&sb, dir->exclude_per_dir); pl->src = strbuf_detach(&sb, NULL); - add_patterns(pl->src, pl->src, stk->baselen, pl, istate, - PATTERN_NOFOLLOW, - untracked ? &oid_stat : NULL); + + /* + * When fsmonitor is active and this directory is + * unchanged, check for cached .gitignore content + * from a previous load. This avoids re-reading the + * file from disk when we're building the exclude + * stack for an invalidated child directory. + */ + if (dir->untracked && + dir->untracked->use_fsmonitor && + untracked && untracked->valid && + untracked->exclude_content) { + char *buf_copy; + /* + * add_patterns_from_buffer() modifies the + * buffer in-place, so we must duplicate it. + */ + buf_copy = xmemdupz(untracked->exclude_content, + untracked->exclude_content_len); + add_patterns_from_buffer(buf_copy, + untracked->exclude_content_len, + pl->src, stk->baselen, pl); + free(buf_copy); + /* + * Trust the cached OID since fsmonitor + * guarantees the file hasn't changed. + */ + oidcpy(&oid_stat.oid, &untracked->exclude_oid); + dir->untracked->gitignore_cached++; + trace_printf_key(&trace_fsmonitor, + "prep_exclude: used cached " + ".gitignore for '%s'", + pl->src); + } else { + /* + * Read the .gitignore from disk. If the + * untracked cache is active, also cache the + * content for potential reuse when fsmonitor + * confirms the file hasn't changed. + */ + char *cached_content = NULL; + size_t cached_len = 0; + add_patterns(pl->src, pl->src, stk->baselen, + pl, istate, PATTERN_NOFOLLOW, + untracked ? &oid_stat : NULL, + untracked ? &cached_content : NULL, + untracked ? &cached_len : NULL); + if (untracked && cached_content) { + free(untracked->exclude_content); + untracked->exclude_content = cached_content; + untracked->exclude_content_len = cached_len; + } + } } /* + * With the fsmonitor optimization in valid_cached_dir(), the + * NEEDSWORK below is partially addressed: when the cache is + * fully valid (confirmed by fsmonitor), prep_exclude() is not + * called at all from valid_cached_dir(). It is only called + * here when building the exclude stack for an invalidated + * child directory, where the patterns ARE needed. + * * NEEDSWORK: when untracked cache is enabled, prep_exclude() * will first be called in valid_cached_dir() then maybe many * times more in last_matching_pattern(). When the cache is @@ -2550,6 +2632,34 @@ static int valid_cached_dir(struct dir_struct *dir, if (untracked->check_only != !!check_only) return 0; + /* + * When fsmonitor is active and confirms this directory is + * unchanged, we can trust the cached exclude_oid without + * re-reading and re-hashing the .gitignore file from disk. + * The fsmonitor guarantees that if anything in this directory + * changed (including the .gitignore file), the directory would + * have been invalidated via untracked_cache_invalidate_trimmed_path(). + * + * This avoids the expensive prep_exclude() call which would + * open, read, and hash every .gitignore file along the path, + * only to confirm the OID hasn't changed. For repositories + * with many .gitignore files, this is a significant performance + * improvement. + * + * The exclude patterns will still be loaded lazily by + * prep_exclude() if they are actually needed later (e.g. when + * last_matching_pattern() is called for files in invalidated + * child directories). + */ + if (dir->untracked->use_fsmonitor && untracked->valid) { + dir->untracked->gitignore_skipped++; + trace_printf_key(&trace_fsmonitor, + "valid_cached_dir: skip prep_exclude for " + "fsmonitor-valid dir '%s'", + path->buf); + return 1; + } + /* * prep_exclude will be called eventually on this directory, * but it's called much later in last_matching_pattern(). We @@ -3130,6 +3240,12 @@ static void emit_traversal_statistics(struct dir_struct *dir, dir->untracked->dir_invalidated); trace2_data_intmax("read_directory", repo, "opendir", dir->untracked->dir_opened); + trace2_data_intmax("read_directory", repo, + "gitignore-skipped", + dir->untracked->gitignore_skipped); + trace2_data_intmax("read_directory", repo, + "gitignore-cached", + dir->untracked->gitignore_cached); } int read_directory(struct dir_struct *dir, struct index_state *istate, @@ -3732,6 +3848,7 @@ static void free_untracked(struct untracked_cache_dir *ucd) free(ucd->untracked[i]); free(ucd->untracked); free(ucd->dirs); + free(ucd->exclude_content); free(ucd); } diff --git a/dir.h b/dir.h index 20d4a078d61ef8..6cd33383fde5ce 100644 --- a/dir.h +++ b/dir.h @@ -184,6 +184,14 @@ struct untracked_cache_dir { unsigned int recurse : 1; /* null object ID means this directory does not have .gitignore */ struct object_id exclude_oid; + /* + * In-memory cache of .gitignore file content for fsmonitor + * optimization. When fsmonitor confirms a directory is unchanged, + * we can reuse this cached content instead of re-reading from disk. + * This field is NOT serialized to the index extension. + */ + char *exclude_content; + size_t exclude_content_len; char name[FLEX_ARRAY]; }; @@ -204,8 +212,17 @@ struct untracked_cache { int gitignore_invalidated; int dir_invalidated; int dir_opened; + int gitignore_skipped; /* prep_exclude() skipped via fsmonitor */ + int gitignore_cached; /* prep_exclude() used cached content */ /* fsmonitor invalidation data */ unsigned int use_fsmonitor : 1; + /* + * Set during refresh_fsmonitor() if any .gitignore file was + * reported as changed. This enables targeted invalidation: + * when no .gitignore files changed, all cached exclude results + * can be fully trusted without re-reading any .gitignore files. + */ + unsigned int gitignore_changed : 1; }; /** diff --git a/fsmonitor.c b/fsmonitor.c index d07dc18967ae33..aa3ecbe93f95b5 100644 --- a/fsmonitor.c +++ b/fsmonitor.c @@ -442,6 +442,25 @@ static void fsmonitor_refresh_callback(struct index_state *istate, char *name) "fsmonitor_refresh_callback '%s' (pos %d)", name, pos); + /* + * Detect changes to .gitignore files so we can set the + * gitignore_changed flag on the untracked cache. This allows + * valid_cached_dir() to know whether any ignore rules may + * have changed, enabling more aggressive caching when no + * .gitignore files were modified. + */ + if (istate->untracked && istate->untracked->exclude_per_dir) { + const char *base = strrchr(name, '/'); + const char *filename = base ? base + 1 : name; + if (!strcmp(filename, istate->untracked->exclude_per_dir)) { + istate->untracked->gitignore_changed = 1; + trace_printf_key(&trace_fsmonitor, + "fsmonitor_refresh_callback: " + ".gitignore changed '%s'", + name); + } + } + if (name[len - 1] == '/') nr_in_cone = handle_path_with_trailing_slash(istate, name, pos); else @@ -657,6 +676,14 @@ void refresh_fsmonitor(struct index_state *istate) */ trace2_region_enter("fsmonitor", "apply_results", istate->repo); + /* + * Reset the gitignore_changed flag before processing results. + * It will be set by fsmonitor_refresh_callback() if any + * .gitignore file appears in the changed path list. + */ + if (istate->untracked) + istate->untracked->gitignore_changed = 0; + if (query_success && !is_trivial) { /* * Mark all pathnames returned by the monitor as dirty. @@ -713,8 +740,10 @@ void refresh_fsmonitor(struct index_state *istate) if (is_cache_changed) istate->cache_changed |= FSMONITOR_CHANGED; - if (istate->untracked) + if (istate->untracked) { istate->untracked->use_fsmonitor = 0; + istate->untracked->gitignore_changed = 1; + } } trace2_region_leave("fsmonitor", "apply_results", istate->repo); diff --git a/t/t7519-status-fsmonitor.sh b/t/t7519-status-fsmonitor.sh index 7ee69ecdd4aa2c..4eda8099c0e7d3 100755 --- a/t/t7519-status-fsmonitor.sh +++ b/t/t7519-status-fsmonitor.sh @@ -477,4 +477,156 @@ test_expect_success 'status succeeds with sparse index' ' ) ' +test_expect_success UNTRACKED_CACHE 'fsmonitor skips .gitignore reading for valid dirs' ' + test_create_repo skip-gitignore && + ( + cd skip-gitignore && + + # Create a directory structure with multiple .gitignore files + mkdir -p dir1 dir2 dir3 && + : >tracked && + : >dir1/tracked && + : >dir2/tracked && + : >dir3/tracked && + echo "*.log" >.gitignore && + echo "*.tmp" >dir1/.gitignore && + echo "*.bak" >dir2/.gitignore && + echo "*.old" >dir3/.gitignore && + test-tool chmtime =-60 tracked dir1/tracked dir2/tracked dir3/tracked && + test-tool chmtime =-60 .gitignore dir1/.gitignore dir2/.gitignore dir3/.gitignore && + test-tool chmtime =-60 dir1 dir2 dir3 . && + git add tracked dir1/tracked dir2/tracked dir3/tracked && + git add .gitignore dir1/.gitignore dir2/.gitignore dir3/.gitignore && + git commit -m "initial" && + + # Install a no-change fsmonitor hook + test_hook --setup --clobber fsmonitor-test <<-\EOF && + printf "last_update_token\0" + EOF + git config core.fsmonitor .git/hooks/fsmonitor-test && + git update-index --untracked-cache && + git update-index --fsmonitor && + + # First status populates the cache + git status && + + # Second status should use the cache + GIT_TRACE2_PERF="$TRASH_DIRECTORY/trace-skip" \ + git status && + + # Verify the optimization is working: gitignore-skipped should be + # non-zero (directories whose prep_exclude was skipped thanks to + # fsmonitor confirming they are unchanged) + grep "gitignore-skipped" "$TRASH_DIRECTORY/trace-skip" >../trace-skip-lines && + # Check that the value after the colon is > 0 + grep "gitignore-skipped:[1-9]" ../trace-skip-lines + ) +' + +test_expect_success UNTRACKED_CACHE 'fsmonitor correctly invalidates on .gitignore change' ' + test_create_repo gitignore-invalidate && + ( + cd gitignore-invalidate && + + # Set up repo with .gitignore + mkdir -p dir1 && + : >tracked && + : >dir1/tracked && + echo "*.log" >.gitignore && + test-tool chmtime =-60 tracked dir1/tracked .gitignore && + test-tool chmtime =-60 dir1 . && + git add tracked dir1/tracked .gitignore && + git commit -m "initial" && + + # Install fsmonitor hook that reports no changes initially + test_hook --setup --clobber fsmonitor-test <<-\EOF && + printf "last_update_token\0" + EOF + git config core.fsmonitor .git/hooks/fsmonitor-test && + git update-index --untracked-cache && + git update-index --fsmonitor && + + # Populate the cache + git status && + git status && + + # Create test files - one should be ignored, one should not + : >dir1/test.log && + : >dir1/test.txt && + + # Now install a hook that reports the .gitignore changed + # plus the new files + test_hook --clobber fsmonitor-test <<-\EOF && + printf "last_update_token\0" + printf ".gitignore\0" + printf "dir1/test.log\0" + printf "dir1/test.txt\0" + printf "dir1\0" + EOF + + # Status should correctly apply ignore rules even though + # the cache was invalidated + git status --porcelain >../actual-invalidate && + echo "?? dir1/test.txt" >../expect-invalidate && + test_cmp ../expect-invalidate ../actual-invalidate + ) +' + +test_expect_success UNTRACKED_CACHE 'fsmonitor status correct with many .gitignore files' ' + test_create_repo many-gitignores && + ( + cd many-gitignores && + + # Create a deeper directory tree with .gitignore at each level + mkdir -p a/b/c/d && + : >tracked && + : >a/tracked && + : >a/b/tracked && + : >a/b/c/tracked && + : >a/b/c/d/tracked && + echo "*.root-ignored" >.gitignore && + echo "*.a-ignored" >a/.gitignore && + echo "*.b-ignored" >a/b/.gitignore && + echo "*.c-ignored" >a/b/c/.gitignore && + echo "*.d-ignored" >a/b/c/d/.gitignore && + git add -A && + git commit -m "initial" && + + # Install no-change fsmonitor hook + test_hook --setup --clobber fsmonitor-test <<-\EOF && + printf "last_update_token\0" + EOF + git config core.fsmonitor .git/hooks/fsmonitor-test && + git update-index --untracked-cache && + git update-index --fsmonitor && + + # Populate and warm the cache + git status && + git status && + + # Add files at the deepest level - some ignored, some not + : >a/b/c/d/file.root-ignored && + : >a/b/c/d/file.d-ignored && + : >a/b/c/d/file.txt && + : >a/b/c/d/file.a-ignored && + + # Report only the deepest dir as changed + test_hook --clobber fsmonitor-test <<-\EOF && + printf "last_update_token\0" + printf "a/b/c/d/file.root-ignored\0" + printf "a/b/c/d/file.d-ignored\0" + printf "a/b/c/d/file.txt\0" + printf "a/b/c/d/file.a-ignored\0" + printf "a/b/c/d\0" + EOF + + # Status should correctly evaluate all parent .gitignore + # rules and only show the non-ignored file. + # Ignore rules cascade: root, a/, a/b/, a/b/c/, a/b/c/d/ + git status --porcelain >../actual-many && + echo "?? a/b/c/d/file.txt" >../expect-many && + test_cmp ../expect-many ../actual-many + ) +' + test_done