8000 Rollback Using Temp Files & Fix Atomic Writes (#1302) · nginx/agent@86fe87e · GitHub
[go: up one dir, main page]

Skip to content

Commit 86fe87e

Browse files
authored
Rollback Using Temp Files & Fix Atomic Writes (#1302)
1 parent c904afa commit 86fe87e

16 files changed

+444
-300
lines changed

internal/bus/topics.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,8 @@ const (
1919
ConnectionResetTopic = "connection-reset"
2020
ConfigApplyRequestTopic = "config-apply-request"
2121
WriteConfigSuccessfulTopic = "write-config-successful"
22-
ConfigApplySuccessfulTopic = "config-apply-successful"
22+
ReloadSuccessfulTopic = "reload-successful"
23+
EnableWatchersTopic = "enable-watchers"
2324
ConfigApplyFailedTopic = "config-apply-failed"
2425
ConfigApplyCompleteTopic = "config-apply-complete"
2526
RollbackWriteTopic = "rollback-write"

internal/file/file_manager_service.go

Lines changed: 119 additions & 66 deletions
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,7 @@ type (
7171
fileToUpdate *mpi.File,
7272
) error
7373
SetIsConnected(isConnected bool)
74-
MoveFilesFromTempDirectory(ctx context.Context, fileAction *model.FileCache, tempDir string) error
74+
RenameFile(ctx context.Context, hash, fileName, tempDir string) error
7575
UpdateClient(ctx context.Context, fileServiceClient mpi.FileServiceClient)
7676
}
7777

@@ -80,14 +80,15 @@ type (
8080
err error)
8181
Rollback(ctx context.Context, instanceID string) error
8282
ClearCache()
83+
SetConfigPath(configPath string)
8384
ConfigUpload(ctx context.Context, configUploadRequest *mpi.ConfigUploadRequest) error
8485
ConfigUpdate(ctx context.Context, nginxConfigContext *model.NginxConfigContext)
8586
UpdateCurrentFilesOnDisk(ctx context.Context, updateFiles map[string]*mpi.File, referenced bool) error
8687
DetermineFileActions(
8788
ctx context.Context,
8889
currentFiles map[string]*mpi.File,
8990
modifiedFiles map[string]*model.FileCache,
90-
) (map[string]*model.FileCache, map[string][]byte, error)
91+
) (map[string]*model.FileCache, error)
9192
IsConnected() bool
9293
SetIsConnected(isConnected bool)
9394
ResetClient(ctx context.Context, fileServiceClient mpi.FileServiceClient)
@@ -101,12 +102,13 @@ type FileManagerService struct {
101102
fileServiceOperator fileServiceOperatorInterface
102103
// map of files and the actions performed on them during config apply
103104
fileActions map[string]*model.FileCache // key is file path
104-
// map of the contents of files which have been updated or deleted during config apply, used during rollback
105-
rollbackFileContents map[string][]byte // key is file path
106105
// map of the files currently on disk, used to determine the file action during config apply
107106
currentFilesOnDisk map[string]*mpi.File // key is file path
108107
previousManifestFiles map[string]*model.ManifestFile
109108
manifestFilePath string
109+
tempConfigDir string
110+
tempRollbackDir string
111+
configPath string
110112
rollbackManifest bool
111113
filesMutex sync.RWMutex
112114
}
@@ -119,15 +121,19 @@ func NewFileManagerService(fileServiceClient mpi.FileServiceClient, agentConfig
119121
fileOperator: NewFileOperator(manifestLock),
120122
fileServiceOperator: NewFileServiceOperator(agentConfig, fileServiceClient, manifestLock),
121123
fileActions: make(map[string]*model.FileCache),
122-
rollbackFileContents: make(map[string][]byte),
123124
currentFilesOnDisk: make(map[string]*mpi.File),
124125
previousManifestFiles: make(map[string]*model.ManifestFile),
125126
rollbackManifest: true,
126127
manifestFilePath: agentConfig.LibDir + "/manifest.json",
128+
configPath: "/etc/nginx/",
127129
manifestLock: manifestLock,
128130
}
129131
}
130132

133+
func (fms *FileManagerService) SetConfigPath(configPath string) {
134+
fms.configPath = filepath.Dir(configPath)
135+
}
136+
131137
func (fms *FileManagerService) ResetClient(ctx context.Context, fileServiceClient mpi.FileServiceClient) {
132138
fms.fileServiceOperator.UpdateClient(ctx, fileServiceClient)
133139
slog.DebugContext(ctx, "File manager service reset client successfully")
@@ -144,6 +150,9 @@ func (fms *FileManagerService) SetIsConnected(isConnected bool) {
144150
func (fms *FileManagerService) ConfigApply(ctx context.Context,
145151
configApplyRequest *mpi.ConfigApplyRequest,
146152
) (status model.WriteStatus, err error) {
153+
var configTempErr error
154+
var rollbackTempErr error
155+
147156
fms.rollbackManifest = true
148157
fileOverview := configApplyRequest.GetOverview()
149158

@@ -156,7 +165,7 @@ func (fms *FileManagerService) ConfigApply(ctx context.Context,
156165
return model.Error, allowedErr
157166
}
158167

159-
diffFiles, fileContent, compareErr := fms.DetermineFileActions(
168+
diffFiles, compareErr := fms.DetermineFileActions(
160169
ctx,
161170
fms.currentFilesOnDisk,
162171
ConvertToMapOfFileCache(fileOverview.GetFiles()),
@@ -170,15 +179,24 @@ func (fms *FileManagerService) ConfigApply(ctx context.Context,
170179
return model.NoChange, nil
171180
}
172181

173-
fms.rollbackFileContents = fileContent
174182
fms.fileActions = diffFiles
175183

176-
tempDir, tempDirError := fms.createTempConfigDirectory(ctx)
177-
if tempDirError != nil {
178-
return model.Error, tempDirError
184+
fms.tempConfigDir, configTempErr = fms.createTempConfigDirectory("config")
185+
if configTempErr != nil {
186+
return model.Error, configTempErr
187+
}
188+
189+
fms.tempRollbackDir, rollbackTempErr = fms.createTempConfigDirectory("rollback")
190+
if rollbackTempErr != nil {
191+
return model.Error, rollbackTempErr
192+
}
193+
194+
rollbackTempFilesErr := fms.backupFiles(ctx)
195+
if rollbackTempFilesErr != nil {
196+
return model.Error, rollbackTempFilesErr
179197
}
180198

181-
fileErr := fms.executeFileActions(ctx, tempDir)
199+
fileErr := fms.executeFileActions(ctx)
182200
if fileErr != nil {
183201
fms.rollbackManifest = false
184202
return model.RollbackRequired, fileErr
@@ -194,12 +212,22 @@ func (fms *FileManagerService) ConfigApply(ctx context.Context,
194212
}
195213

196214
func (fms *FileManagerService) ClearCache() {
197-
clear(fms.rollbackFileContents)
215+
slog.Debug("Clearing cache and temp files after config apply")
198216
clear(fms.fileActions)
199217
clear(fms.previousManifestFiles)
218+
219+
configErr := os.RemoveAll(fms.tempConfigDir)
220+
if configErr != nil {
221+
slog.Error("Error removing temp config directory", "path", fms.tempConfigDir, "err", configErr)
222+
}
223+
224+
rollbackErr := os.RemoveAll(fms.tempRollbackDir)
225+
if rollbackErr != nil {
226+
slog.Error("Error removing temp rollback directory", "path", fms.tempRollbackDir, "err", rollbackErr)
227+
}
200228
}
201229

202-
//nolint:revive // cognitive-complexity of 13 max is 12, loop is needed cant be broken up
230+
//nolint:revive,cyclop // cognitive-complexity of 13 max is 12, loop is needed cant be broken up
203231
func (fms *FileManagerService) Rollback(ctx context.Context, instanceID string) error {
204232
slog.InfoContext(ctx, "Rolling back config for instance", "instance_id", instanceID)
205233

@@ -217,16 +245,14 @@ func (fms *FileManagerService) Rollback(ctx context.Context, instanceID string)
217245

218246
continue
219247
case model.Delete, model.Update:
220-
fileMeta := fileAction.File.GetFileMeta()
221-
content := fms.rollbackFileContents[fileMeta.GetName()]
222-
err := fms.fileOperator.Write(ctx, content, fileMeta.GetName(), fileMeta.GetPermissions())
248+
content, err := fms.restoreFiles(fileAction)
223249
if err != nil {
224250
return err
225251
}
226252

227253
// currentFilesOnDisk needs to be updated after rollback action is performed
228-
fileMeta.Hash = files.GenerateHash(content)
229-
fms.currentFilesOnDisk[fileMeta.GetName()] = fileAction.File
254+
fileAction.File.FileMeta.Hash = files.GenerateHash(content)
255+
fms.currentFilesOnDisk[fileAction.File.GetFileMeta().GetName()] = fileAction.File
230256
case model.Unchanged:
231257
fallthrough
232258
default:
@@ -308,20 +334,18 @@ func (fms *FileManagerService) DetermineFileActions(
308334
modifiedFiles map[string]*model.FileCache,
309335
) (
310336
map[string]*model.FileCache,
311-
map[string][]byte,
312337
error,
313338
) {
314339
fms.filesMutex.Lock()
315340
defer fms.filesMutex.Unlock()
316341

317342
fileDiff := make(map[string]*model.FileCache) // Files that have changed, key is file name
318-
fileContents := make(map[string][]byte) // contents of the file, key is file name
319343

320344
_, filesMap, manifestFileErr := fms.manifestFile()
321345

322346
if manifestFileErr != nil {
323347
if !errors.Is(manifestFileErr, os.ErrNotExist) {
324-
return nil, nil, manifestFileErr
348+
return nil, manifestFileErr
325349
}
326350
filesMap = currentFiles
327351
}
@@ -331,25 +355,16 @@ func (fms *FileManagerService) DetermineFileActions(
331355
for fileName, manifestFile := range filesMap {
332356
_, exists := modifiedFiles[fileName]
333357

334-
if !exists {
335-
// Read file contents before marking it deleted
336-
fileContent, readErr := os.ReadFile(fileName)
337-
if readErr != nil {
338-
if errors.Is(readErr, os.ErrNotExist) {
339-
slog.DebugContext(ctx, "Unable to backup file contents since file does not exist", "file", fileName)
340-
continue
341-
}
342-
343-
return nil, nil, fmt.Errorf("error reading file %s: %w", fileName, readErr)
344-
}
345-
fileContents[fileName] = fileContent
358+
if !fms.agentConfig.IsDirectoryAllowed(fileName) {
359+
return nil, fmt.Errorf("error deleting file %s: file not in allowed directories", fileName)
360+
}
346361

347-
// Allowed directories could have been modified since file was created,
348-
// so we should check before marking for deletion
349-
if !fms.agentConfig.IsDirectoryAllowed(fileName) {
350-
return nil, nil, fmt.Errorf("error deleting file %s: file not in allowed directories", fileName)
351-
}
362+
if _, err := os.Stat(fileName); os.IsNotExist(err) {
363+
slog.DebugContext(ctx, "File already deleted, skipping", "file", fileName)
364+
continue
365+
}
352366

367+
if !exists {
353368
fileDiff[fileName] = &model.FileCache{
354369
File: manifestFile,
355370
Action: model.Delete,
@@ -359,7 +374,7 @@ func (fms *FileManagerService) DetermineFileActions(
359374

360375
for _, modifiedFile := range modifiedFiles {
361376
fileName := modifiedFile.File.GetFileMeta().GetName()
362-
currentFile, ok := filesMap[modifiedFile.File.GetFileMeta().GetName()]
377+
currentFile, ok := filesMap[fileName]
363378
// default to unchanged action
364379
modifiedFile.Action = model.Unchanged
365380

@@ -369,25 +384,20 @@ func (fms *FileManagerService) DetermineFileActions(
369384
}
370385
// if file doesn't exist in the current files, file has been added
371386
// set file action
372-
if _, statErr := os.Stat(modifiedFile.File.GetFileMeta().GetName()); errors.Is(statErr, os.ErrNotExist) {
387+
if _, statErr := os.Stat(fileName); errors.Is(statErr, os.ErrNotExist) {
373388
modifiedFile.Action = model.Add
374-
fileDiff[modifiedFile.File.GetFileMeta().GetName()] = modifiedFile
389+
fileDiff[fileName] = modifiedFile
375390

376391
continue
377392
// if file currently exists and file hash is different, file has been updated
378393
// copy contents, set file action
379394
} else if ok && modifiedFile.File.GetFileMeta().GetHash() != currentFile.GetFileMeta().GetHash() {
380-
fileContent, readErr := os.ReadFile(fileName)
381-
if readErr != nil {
382-
return nil, nil, fmt.Errorf("error reading file %s, error: %w", fileName, readErr)
383-
}
384395
modifiedFile.Action = model.Update
385-
fileContents[fileName] = fileContent
386-
fileDiff[modifiedFile.File.GetFileMeta().GetName()] = modifiedFile
396+
fileDiff[fileName] = modifiedFile
387397
}
388398
}
389399

390-
return fileDiff, fileContents, nil
400+
return fileDiff, nil
391401
}
392402

393403
// UpdateCurrentFilesOnDisk updates the FileManagerService currentFilesOnDisk slice which contains the files
@@ -463,6 +473,59 @@ func (fms *FileManagerService) UpdateManifestFile(ctx context.Context,
463473
return fms.fileOperator.WriteManifestFile(ctx, updatedFiles, fms.agentConfig.LibDir, fms.manifestFilePath)
464474
}
465475

476+
func (fms *FileManagerService) backupFiles(ctx context.Context) error {
477+
for _, file := range fms.fileActions {
478+
if file.Action == model.Add || file.Action == model.Unchanged {
479+
continue
480+
}
481+
482+
filePath := file.File.GetFileMeta().GetName()
483+
484+
if _, err := os.Stat(filePath); os.IsNotExist(err) {
485+
slog.DebugContext(ctx, "Unable to backup file content since file does not exist",
486+
"file", filePath)
487+
488+
continue
489+
}
490+
491+
tempFilePath := filepath.Join(fms.tempRollbackDir, filePath)
492+
slog.DebugContext(ctx, "Attempting to backup file content since file exists", "temp_path", tempFilePath)
493+
494+
moveErr := fms.fileOperator.MoveFile(ctx, filePath, tempFilePath)
495+
496+
if moveErr != nil {
497+
return moveErr
498+
}
499+
}
500+
501+
return nil
502+
}
503+
504+
func (fms *FileManagerService) restoreFiles(fileAction *model.FileCache) ([]byte, error) {
505+
fileMeta := fileAction.File.GetFileMeta()
506+
fileName := fileMeta.GetName()
507+
508+
tempFilePath := filepath.Join(fms.tempRollbackDir, fileName)
509+
510+
// Create parent directories for the target file if they don't exist
511+
if err := os.MkdirAll(filepath.Dir(fileName), dirPerm); err != nil {
512+
return nil, fmt.Errorf("failed to create directories for %s: %w", fileName, err)
513+
}
514+
515+
moveErr := os.Rename(tempFilePath, fileName)
516+
if moveErr != nil {
517+
return nil, fmt.Errorf("failed to rename file, %s to %s: %w", tempFilePath, fileName, moveErr)
518+
}
519+
520+
content, readErr := os.ReadFile(fileMeta.GetName())
521+
if readErr != nil {
522+
return nil, fmt.Errorf("error reading file, unable to generate hash: %s error: %w",
523+
fileMeta.GetName(), readErr)
524+
}
525+
526+
return content, nil
527+
}
528+
466529
func (fms *FileManagerService) manifestFile() (map[string]*model.ManifestFile, map[string]*mpi.File, error) {
467530
if _, err := os.Stat(fms.manifestFilePath); err != nil {
468531
return nil, nil, err
@@ -491,17 +554,17 @@ func (fms *FileManagerService) manifestFile() (map[string]*model.ManifestFile, m
491554
return manifestFiles, fileMap, nil
492555
}
493556

494-
func (fms *FileManagerService) executeFileActions(ctx context.Context, tempDir string) (actionError error) {
557+
func (fms *FileManagerService) executeFileActions(ctx context.Context) (actionError error) {
495558
// Download files to temporary location
496-
downloadError := fms.downloadUpdatedFilesToTempLocation(ctx, tempDir)
559+
downloadError := fms.downloadUpdatedFilesToTempLocation(ctx, fms.tempConfigDir)
497560
if downloadError != nil {
498561
return downloadError
499562
}
500563

501564
// Remove temp files if there is a failure moving or deleting files
502-
actionError = fms.moveOrDeleteFiles(ctx, tempDir, actionError)
565+
actionError = fms.moveOrDeleteFiles(ctx, fms.tempConfigDir, actionError)
503566
if actionError != nil {
504-
fms.deleteTempFiles(ctx, tempDir)
567+
fms.deleteTempFiles(ctx, fms.tempConfigDir)
505568
}
506569

507570
return actionError
@@ -528,11 +591,6 @@ func (fms *FileManagerService) downloadUpdatedFilesToTempLocation(
528591
}
529592
}
530593

531-
// Remove temp files if there is an error downloading any files
532-
if updateError != nil {
533-
fms.deleteTempFiles(ctx, tempDir)
534-
}
535-
536594
return updateError
537595
}
538596

@@ -551,7 +609,8 @@ actionsLoop:
551609

552610
continue
553611
case model.Add, model.Update:
554-
err := fms.fileServiceOperator.MoveFilesFromTempDirectory(ctx, fileAction, tempDir)
612+
fileMeta := fileAction.File.GetFileMeta()
613+
err := fms.fileServiceOperator.RenameFile(ctx, fileMeta.GetHash(), fileMeta.GetName(), tempDir)
555614
if err != nil {
556615
actionError = err
557616

@@ -649,17 +708,11 @@ func (fms *FileManagerService) convertToFile(manifestFile *model.ManifestFile) *
649708
}
650709
}
651710

652-
func (fms *FileManagerService) createTempConfigDirectory(ctx context.Context) (string, error) {
653-
tempDir, tempDirError := os.MkdirTemp(fms.agentConfig.LibDir, "config")
711+
func (fms *FileManagerService) createTempConfigDirectory(pattern string) (string, error) {
712+
tempDir, tempDirError := os.MkdirTemp(fms.configPath, pattern)
654713
if tempDirError != nil {
655714
return "", fmt.Errorf("failed creating temp config directory: %w", tempDirError)
656715
}
657-
defer func(path string) {
658-
err := os.RemoveAll(path)
659-
if err != nil {
660-
slog.ErrorContext(ctx, "error removing temp config directory", "path", path, "err", err)
661-
}
662-
}(tempDir)
663716

664717
return tempDir, nil
665718
}

0 commit comments

Comments
 (0)
0