28
28
#include <time.h>
29
29
#include <sys/types.h>
30
30
#include <sys/stat.h>
31
+ #include <sys/wait.h>
31
32
#include <unistd.h>
32
33
33
34
#ifdef HAVE_SYS_RESOURCE_H
@@ -149,10 +150,10 @@ static int CreateRestrictedProcess(char *cmd, PROCESS_INFORMATION *processInfo,
149
150
150
151
static pgpid_t get_pgpid (void );
151
152
static char * * readfile (const char * path );
152
- static int start_postmaster (void );
153
+ static pgpid_t start_postmaster (void );
153
154
static void read_post_opts (void );
154
155
155
- static PGPing test_postmaster_connection (bool );
156
+ static PGPing test_postmaster_connection (pgpid_t pm_pid , bool do_checkpoint );
156
157
static bool postmaster_is_alive (pid_t pid );
157
158
158
159
#if defined(HAVE_GETRLIMIT ) && defined(RLIMIT_CORE )
@@ -396,36 +397,73 @@ readfile(const char *path)
396
397
* start/test/stop routines
397
398
*/
398
399
399
- static int
400
+ /*
401
+ * Start the postmaster and return its PID.
402
+ *
403
+ * Currently, on Windows what we return is the PID of the shell process
404
+ * that launched the postmaster (and, we trust, is waiting for it to exit).
405
+ * So the PID is usable for "is the postmaster still running" checks,
406
+ * but cannot be compared directly to postmaster.pid.
407
+ *
408
+ * On Windows, we also save aside a handle to the shell process in
409
+ * "postmasterProcess", which the caller should close when done with it.
410
+ */
411
+ static pgpid_t
400
412
start_postmaster (void )
401
413
{
402
414
char cmd [MAXPGPATH ];
403
415
404
416
#ifndef WIN32
417
+ pgpid_t pm_pid ;
418
+
419
+ /* Flush stdio channels just before fork, to avoid double-output problems */
420
+ fflush (stdout );
421
+ fflush (stderr );
422
+
423
+ pm_pid = fork ();
424
+ if (pm_pid < 0 )
425
+ {
426
+ /* fork failed */
427
+ write_stderr (_ ("%s: could not start server: %s\n" ),
428
+ progname , strerror (errno ));
429
+ exit (1 );
430
+ }
431
+ if (pm_pid > 0 )
432
+ {
433
+ /* fork succeeded, in parent */
434
+ return pm_pid ;
435
+ }
436
+
437
+ /* fork succeeded, in child */
405
438
406
439
/*
407
440
* Since there might be quotes to handle here, it is easier simply to pass
408
- * everything to a shell to process them.
409
- *
410
- * XXX it would be better to fork and exec so that we would know the child
411
- * postmaster's PID directly; then test_postmaster_connection could use
412
- * the PID without having to rel
629A
y on reading it back from the pidfile.
441
+ * everything to a shell to process them. Use exec so that the postmaster
442
+ * has the same PID as the current child process.
413
443
*/
414
444
if (log_file != NULL )
415
- snprintf (cmd , MAXPGPATH , SYSTEMQUOTE " \"%s\" %s%s < \"%s\" >> \"%s\" 2>&1 &" SYSTEMQUOTE ,
445
+ snprintf (cmd , MAXPGPATH , "exec \"%s\" %s%s < \"%s\" >> \"%s\" 2>&1" ,
416
446
exec_path , pgdata_opt , post_opts ,
417
447
DEVNULL , log_file );
418
448
else
419
- snprintf (cmd , MAXPGPATH , SYSTEMQUOTE " \"%s\" %s%s < \"%s\" 2>&1 &" SYSTEMQUOTE ,
449
+ snprintf (cmd , MAXPGPATH , "exec \"%s\" %s%s < \"%s\" 2>&1" ,
420
450
exec_path , pgdata_opt , post_opts , DEVNULL );
421
451
422
- return system (cmd );
452
+ (void ) execl ("/bin/sh" , "/bin/sh" , "-c" , cmd , (char * ) NULL );
453
+
454
+ /* exec failed */
455
+ write_stderr (_ ("%s: could not start server: %s\n" ),
456
+ progname , strerror (errno ));
457
+ exit (1 );
458
+
459
+ return 0 ; /* keep dumb compilers quiet */
460
+
423
461
#else /* WIN32 */
424
462
425
463
/*
426
- * On win32 we don't use system(). So we don't need to use & (which would
427
- * be START /B on win32). However, we still call the shell ( CMD.EXE) with
428
- * it to handle redirection etc .
464
+ * As with the Unix case, it's easiest to use the shell (CMD.EXE) to
465
+ * handle redirection etc. Unfortunately CMD.EXE lacks any equivalent of
466
+ * "exec", so we don't get to find out the postmaster's PID immediately .
429
467
*/
430
468
PROCESS_INFORMATION pi ;
431
469
@@ -437,10 +475,15 @@ start_postmaster(void)
437
475
exec_path , pgdata_opt , post_opts , DEVNULL );
438
476
439
477
if (!CreateRestrictedProcess (cmd , & pi , false))
440
- return GetLastError ();
441
- CloseHandle (pi .hProcess );
478
+ {
479
+ write_stderr (_ ("%s: could not start server: error code %lu\n" ),
480
+ progname , (unsigned long ) GetLastError ());
481
+ exit (1 );
482
+ }
483
+ /* Don't close command process handle here; caller must do so */
484
+ postmasterProcess = pi .hProcess ;
442
485
CloseHandle (pi .hThread );
443
- return 0 ;
486
+ return pi . dwProcessId ; /* Shell's PID, not postmaster's! */
444
487
#endif /* WIN32 */
445
488
}
446
489
@@ -449,15 +492,21 @@ start_postmaster(void)
449
492
/*
450
493
* Find the pgport and try a connection
451
494
*
495
+ * On Unix, pm_pid is the PID of the just-launched postmaster. On Windows,
496
+ * it may be the PID of an ancestor shell process, so we can't check the
497
+ * contents of postmaster.pid quite as carefully.
498
+ *
499
+ * On Windows, the static variable postmasterProcess is an implicit argument
500
+ * to this routine; it contains a handle to the postmaster process or an
501
+ * ancestor shell process thereof.
502
+ *
452
503
* Note that the checkpoint parameter enables a Windows service control
453
504
* manager checkpoint, it's got nothing to do with database checkpoints!!
454
505
*/
455
506
static PGPing
456
- test_postmaster_connection (bool do_checkpoint )
507
+ test_postmaster_connection (pgpid_t pm_pid , bool do_checkpoint )
457
508
{
458
509
PGPing ret = PQPING_NO_RESPONSE ;
459
- bool found_stale_pidfile = false;
460
- pgpid_t pm_pid = 0 ;
461
510
char connstr [MAXPGPATH * 2 + 256 ];
462
511
int i ;
463
512
@@ -512,29 +561,27 @@ test_postmaster_connection(bool do_checkpoint)
512
561
optlines [5 ] != NULL )
513
562
{
514
563
/* File is complete enough for us, parse it */
515
- long pmpid ;
564
+ pgpid_t pmpid ;
516
565
time_t pmstart ;
517
566
518
567
/*
519
- * Make sanity checks. If it's for a standalone backend
520
- * (negative PID), or the recorded start time is before
521
- * pg_ctl started, then either
10000
we are looking at the wrong
522
- * data directory, or this is a pre-existing pidfile that
523
- * hasn't (yet?) been overwritten by our child postmaster.
524
- * Allow 2 seconds slop for possible cross-process clock
525
- * skew.
568
+ * Make sanity checks. If it's for the wrong PID, or the
569
+ * recorded start time is before pg_ctl started, then
570
+ * either we are looking at the wrong data directory, or
571
+ * this is a pre-existing pidfile that hasn't (yet?) been
572
+ * overwritten by our child postmaster. Allow 2 seconds
573
+ * slop for possible cross-process clock skew.
526
574
*/
527
575
pmpid = atol (optlines [LOCK_FILE_LINE_PID - 1 ]);
528
576
pmstart = atol (optlines [LOCK_FILE_LINE_START_TIME - 1 ]);
529
- if (pmpid <= 0 || pmstart < start_time - 2 )
530
- {
531
- /*
532
- * Set flag to report stale pidfile if it doesn't get
533
- * overwritten before we give up waiting.
534
- */
535
- found_stale_pidfile = true;
536
- }
537
- else
577
+ if (pmstart >= start_time - 2 &&
578
+ #ifndef WIN32
579
+ pmpid == pm_pid
580
+ #else
581
+ /* Windows can only reject standalone-backend PIDs */
582
+ pmpid > 0
583
+ #endif
584
+ )
538
585
{
539
586
/*
540
587
* OK, seems to be a valid pidfile from our child.
@@ -544,9 +591,6 @@ test_postmaster_connection(bool do_checkpoint)
544
591
char * hostaddr ;
545
592
char host_str [MAXPGPATH ];
546
593
547
- found_stale_pidfile = false;
548
- pm_pid = (pgpid_t ) pmpid ;
549
-
550
594
/*
551
595
* Extract port number and host string to use. Prefer
552
596
* using Unix socket if available.
@@ -605,37 +649,23 @@ test_postmaster_connection(bool do_checkpoint)
605
649
}
606
650
607
651
/*
608
- * The postmaster should create postmaster.pid very soon after being
609
- * started. If it's not there after we've waited 5 or more seconds,
610
- * assume startup failed and give up waiting. (Note this covers both
611
- * cases where the pidfile was never created, and where it was created
612
- * and then removed during postmaster exit.) Also, if there *is* a
613
- * file there but it appears stale, issue a suitable warning and give
614
- * up waiting.
652
+ * Check whether the child postmaster process is still alive. This
653
+ * lets us exit early if the postmaster fails during startup.
654
+ *
655
+ * On Windows, we may be checking the postmaster's parent shell, but
656
+ * that's fine for this purpose.
615
657
*/
616
- if ( i >= 5 )
658
+ #ifndef WIN32
617
659
{
618
- struct stat statbuf ;
619
-
620
- if (stat (pid_file , & statbuf ) != 0 )
621
- return PQPING_NO_RESPONSE ;
660
+ int exitstatus ;
622
661
623
- if (found_stale_pidfile )
624
- {
625
- write_stderr (_ ("\n%s: this data directory appears to be running a pre-existing postmaster\n" ),
626
- progname );
662
+ if (waitpid ((pid_t ) pm_pid , & exitstatus , WNOHANG ) == (pid_t ) pm_pid )
627
663
return PQPING_NO_RESPONSE ;
628
- }
629
664
}
630
-
631
- /*
632
- * If we've been able to identify the child postmaster's PID, check
633
- * the process is still alive. This covers cases where the postmaster
634
- * successfully created the pidfile but then crashed without removing
635
- * it.
636
- */
637
- if (pm_pid > 0 && !postmaster_is_alive ((pid_t ) pm_pid ))
665
+ #else
666
+ if (WaitForSingleObject (postmasterProcess , 0 ) == WAIT_OBJECT_0 )
638
667
return PQPING_NO_RESPONSE ;
668
+ #endif
639
669
640
670
/* No response, or startup still in process; wait */
641
671
#if defined(WIN32 )
@@ -798,7 +828,7 @@ static void
798
828
do_start (void )
799
829
{
800
830
pgpid_t old_pid = 0 ;
801
- int exitcode ;
831
+ pgpid_t pm_pid ;
802
832
803
833
if (ctl_command != RESTART_COMMAND )
804
834
{
@@ -838,19 +868,13 @@ do_start(void)
838
868
}
839
869
#endif
840
870
841
- exitcode = start_postmaster ();
842
- if (exitcode != 0 )
843
- {
844
- write_stderr (_ ("%s: could not start server: exit code was %d\n" ),
845
- progname , exitcode );
846
- exit (1 );
847
- }
871
+ pm_pid = start_postmaster ();
848
872
849
873
if (do_wait )
850
874
{
851
875
print_msg (_ ("waiting for server to start..." ));
852
876
853
- switch (test_postmaster_connection (false))
877
+ switch (test_postmaster_connection (pm_pid , false))
854
878
{
855
879
case PQPING_OK :
856
880
print_msg (_ (" done\n" ));
@@ -876,6 +900,12 @@ do_start(void)
876
900
}
877
901
else
878
902
print_msg (_ ("server starting\n" ));
903
+
904
+ #ifdef WIN32
905
+ /* Now we don't need the handle to the shell process anymore */
906
+ CloseHandle (postmasterProcess );
907
+ postmasterProcess = INVALID_HANDLE_VALUE ;
908
+ #endif
879
909
}
880
910
881
911
@@ -1479,7 +1509,7 @@ pgwin32_ServiceMain(DWORD argc, LPTSTR *argv)
1479
1509
if (do_wait )
1480
1510
{
1481
1511
write_eventlog (EVENTLOG_INFORMATION_TYPE , _ ("Waiting for server startup...\n" ));
1482
- if (test_postmaster_connection (true) != PQPING_OK )
1512
+ if (test_postmaster_connection (postmasterPID , true) != PQPING_OK )
1483
1513
{
1484
1514
write_eventlog (EVENTLOG_ERROR_TYPE , _ ("Timed out waiting for server startup\n" ));
1485
1515
pgwin32_SetServiceStatus (SERVICE_STOPPED );
@@ -1506,10 +1536,9 @@ pgwin32_ServiceMain(DWORD argc, LPTSTR *argv)
1506
1536
{
1507
1537
/*
1508
1538
* status.dwCheckPoint can be incremented by
1509
- * test_postmaster_connection(true), so it might not
1510
- * start from 0.
1539
+ * test_postmaster_connection(), so it might not start from 0.
1511
1540
*/
1512
- int maxShutdownCheckPoint = status .dwCheckPoint + 12 ; ;
1541
+ int maxShutdownCheckPoint = status .dwCheckPoint + 12 ;
1513
1542
1514
1543
kill (postmasterPID , SIGINT );
1515
1544
0 commit comments