? GNUmakefile
? config.log
? config.status
? contrib/pg_standby/pg_standby
? contrib/pgbench/pgbench
? src/Makefile.global
? src/backend/postgres
? src/backend/catalog/postgres.bki
? src/backend/catalog/postgres.description
? src/backend/catalog/postgres.shdescription
? src/backend/snowball/snowball_create.sql
? src/backend/utils/probes.h
? src/backend/utils/mb/conversion_procs/conversion_create.sql
? src/bin/initdb/initdb
? src/bin/pg_config/pg_config
? src/bin/pg_controldata/pg_controldata
? src/bin/pg_ctl/pg_ctl
? src/bin/pg_dump/pg_dump
? src/bin/pg_dump/pg_dumpall
? src/bin/pg_dump/pg_restore
? src/bin/pg_resetxlog/pg_resetxlog
? src/bin/psql/psql
? src/bin/scripts/clusterdb
? src/bin/scripts/createdb
? src/bin/scripts/createlang
? src/bin/scripts/createuser
? src/bin/scripts/dropdb
? src/bin/scripts/droplang
? src/bin/scripts/dropuser
? src/bin/scripts/reindexdb
? src/bin/scripts/vacuumdb
? src/include/pg_config.h
? src/include/stamp-h
? src/interfaces/ecpg/compatlib/exports.list
? src/interfaces/ecpg/compatlib/libecpg_compat.so.3.1
? src/interfaces/ecpg/ecpglib/exports.list
? src/interfaces/ecpg/ecpglib/libecpg.so.6.1
? src/interfaces/ecpg/include/ecpg_config.h
? src/interfaces/ecpg/include/stamp-h
? src/interfaces/ecpg/pgtypeslib/exports.list
? src/interfaces/ecpg/pgtypeslib/libpgtypes.so.3.1
? src/interfaces/ecpg/preproc/ecpg
? src/interfaces/libpq/exports.list
? src/interfaces/libpq/libpq.so.5.2
? src/port/pg_config_paths.h
? src/test/regress/pg_regress
? src/test/regress/testtablespace
? src/timezone/zic
Index: contrib/pg_standby/pg_standby.c
===================================================================
RCS file: /projects/cvsroot/pgsql/contrib/pg_standby/pg_standby.c,v
retrieving revision 1.20
diff -c -r1.20 pg_standby.c
*** contrib/pg_standby/pg_standby.c	18 Mar 2009 20:30:35 -0000	1.20
--- contrib/pg_standby/pg_standby.c	25 Mar 2009 06:26:40 -0000
***************
*** 52,65 ****
  int			keepfiles = 0;		/* number of WAL files to keep, 0 keep all */
  int			maxretries = 3;		/* number of retries on restore command */
  bool		debug = false;		/* are we debugging? */
! bool		triggered = false;	/* have we been triggered? */
  bool		need_cleanup = false;		/* do we need to remove files from
  										 * archive? */
  
  static volatile sig_atomic_t signaled = false;
  
  char	   *archiveLocation;	/* where to find the archive? */
- char	   *triggerPath;		/* where to find the trigger file? */
  char	   *xlogFilePath;		/* where we are going to restore to */
  char	   *nextWALFileName;	/* the file we need to get from archive */
  char	   *restartWALFileName; /* the file from which we can restart restore */
--- 52,64 ----
  int			keepfiles = 0;		/* number of WAL files to keep, 0 keep all */
  int			maxretries = 3;		/* number of retries on restore command */
  bool		debug = false;		/* are we debugging? */
! bool		triggered = false;	/* have we been triggered (cancel)? */
  bool		need_cleanup = false;		/* do we need to remove files from
  										 * archive? */
  
  static volatile sig_atomic_t signaled = false;
  
  char	   *archiveLocation;	/* where to find the archive? */
  char	   *xlogFilePath;		/* where we are going to restore to */
  char	   *nextWALFileName;	/* the file we need to get from archive */
  char	   *restartWALFileName; /* the file from which we can restart restore */
***************
*** 69,74 ****
--- 68,92 ----
  char		exclusiveCleanupFileName[MAXPGPATH];		/* the file we need to
  														 * get from archive */
  
+ /*
+  * Where to find the trigger file?
+  *
+  * Two types (finish and cancel) of trigger files are supported.
+  *
+  * When "finish" trigger file exists, pg_standby acts as cp or ln command itself,
+  * and recovery waits to finish until all the available WAL files are redone. It's
+  * guaranteed that no "available" commit transactions are lost. But it might take
+  * some times before finishing recovery.
+  *
+  * On the other hand, the existence of "cancel" trigger file causes recovery to
+  * end immediately even if the available WAL files remain. So, some transactions
+  * might be lost.
+  *
+  * When both of them exist, we prefer "cancel" trigger file to "finish" one.
+  */
+ char	*finishTriggerPath;
+ char	*cancelTriggerPath;
+ 
  #define RESTORE_COMMAND_COPY 0
  #define RESTORE_COMMAND_LINK 1
  int			restoreCommandType;
***************
*** 355,364 ****
   *	  Is there a trigger file?
   */
  static bool
! CheckForExternalTrigger(void)
  {
  	int			rc;
  
  	/*
  	 * Look for a trigger file, if that option has been selected
  	 *
--- 373,389 ----
   *	  Is there a trigger file?
   */
  static bool
! CheckForExternalTrigger(char *triggerPath, bool delete_trigger)
  {
  	int			rc;
  
+ 	if (debug)
+  	{
+  		if (triggerPath)
+  			fprintf(stderr, " Checking for trigger file...: %s", triggerPath);
+  		fflush(stderr);
+  	}
+ 
  	/*
  	 * Look for a trigger file, if that option has been selected
  	 *
***************
*** 367,388 ****
  	 */
  	if (triggerPath && stat(triggerPath, &stat_buf) == 0)
  	{
! 		fprintf(stderr, "trigger file found\n");
  		fflush(stderr);
  
! 		/*
! 		 * If trigger file found, we *must* delete it. Here's why: When
! 		 * recovery completes, we will be asked again for the same file from
! 		 * the archive using pg_standby so must remove trigger file so we can
! 		 * reload file again and come up correctly.
! 		 */
! 		rc = unlink(triggerPath);
! 		if (rc != 0)
  		{
! 			fprintf(stderr, "\n ERROR: could not remove \"%s\": %s", triggerPath, strerror(errno));
! 			fflush(stderr);
! 			exit(rc);
  		}
  		return true;
  	}
  
--- 392,411 ----
  	 */
  	if (triggerPath && stat(triggerPath, &stat_buf) == 0)
  	{
! 		fprintf(stderr, "trigger file found: %s\n", triggerPath);
  		fflush(stderr);
  
! 		if (delete_trigger)
  		{
! 			rc = unlink(triggerPath);
! 			if (rc != 0)
! 			{
! 				fprintf(stderr, "\n ERROR: could not remove \"%s\": %s", triggerPath, strerror(errno));
! 				fflush(stderr);
! 				exit(rc);
! 			}
  		}
+ 		
  		return true;
  	}
  
***************
*** 450,456 ****
  		   "                     (default=3)\n");
  	printf("  -s SLEEPTIME       seconds to wait between file checks (min=1, max=60,\n"
  		   "                     default=5)\n");
! 	printf("  -t TRIGGERFILE     defines a trigger file to initiate failover (no default)\n");
  	printf("  -w MAXWAITTIME     max seconds to wait for a file (0=no limit) (default=0)\n");
  	printf("  --help             show this help, then exit\n");
  	printf("  --version          output version information, then exit\n");
--- 473,480 ----
  		   "                     (default=3)\n");
  	printf("  -s SLEEPTIME       seconds to wait between file checks (min=1, max=60,\n"
  		   "                     default=5)\n");
! 	printf("  -t CANCELTRIGGERFILE	defines a trigger file to cancel recovery (no default)\n");
! 	printf("  -T FINISHTRIGGERFILE	defines a trigger file to finish recovery (no default)\n");
  	printf("  -w MAXWAITTIME     max seconds to wait for a file (0=no limit) (default=0)\n");
  	printf("  --help             show this help, then exit\n");
  	printf("  --version          output version information, then exit\n");
***************
*** 513,519 ****
  	(void) signal(SIGQUIT, sigquit_handler);
  #endif
  
! 	while ((c = getopt(argc, argv, "cdk:lr:s:t:w:")) != -1)
  	{
  		switch (c)
  		{
--- 537,543 ----
  	(void) signal(SIGQUIT, sigquit_handler);
  #endif
  
! 	while ((c = getopt(argc, argv, "cdk:lr:s:t:T:w:")) != -1)
  	{
  		switch (c)
  		{
***************
*** 550,560 ****
  					exit(2);
  				}
  				break;
! 			case 't':			/* Trigger file */
! 				triggerPath = optarg;
! 				if (CheckForExternalTrigger())
  					exit(1);	/* Normal exit, with non-zero */
  				break;
  			case 'w':			/* Max wait time */
  				maxwaittime = atoi(optarg);
  				if (maxwaittime < 0)
--- 574,587 ----
  					exit(2);
  				}
  				break;
! 			case 't':			/* Cancel trigger file */
! 				cancelTriggerPath = optarg;
! 				if (CheckForExternalTrigger(cancelTriggerPath, true))
  					exit(1);	/* Normal exit, with non-zero */
  				break;
+ 			case 'T':			/* Finish trigger file */
+ 				finishTriggerPath = optarg;
+ 				break;
  			case 'w':			/* Max wait time */
  				maxwaittime = atoi(optarg);
  				if (maxwaittime < 0)
***************
*** 633,639 ****
  
  	if (debug)
  	{
! 		fprintf(stderr, "\nTrigger file 		: %s", triggerPath ? triggerPath : "<not set>");
  		fprintf(stderr, "\nWaiting for WAL file	: %s", nextWALFileName);
  		fprintf(stderr, "\nWAL file path		: %s", WALFilePath);
  		fprintf(stderr, "\nRestoring to...		: %s", xlogFilePath);
--- 660,669 ----
  
  	if (debug)
  	{
! 		fprintf(stderr, "\nCancel trigger file 		: %s",
! 				cancelTriggerPath ? cancelTriggerPath : "<not set>");
! 		fprintf(stderr, "\nFinish trigger file 		: %s",
! 				finishTriggerPath ? finishTriggerPath : "<not set>");
  		fprintf(stderr, "\nWaiting for WAL file	: %s", nextWALFileName);
  		fprintf(stderr, "\nWAL file path		: %s", WALFilePath);
  		fprintf(stderr, "\nRestoring to...		: %s", xlogFilePath);
***************
*** 673,724 ****
  		}
  	}
  
! 	/*
! 	 * Main wait loop
! 	 */
! 	while (!CustomizableNextWALFileReady() && !triggered)
! 	{
! 		if (sleeptime <= 60)
! 			pg_usleep(sleeptime * 1000000L);
! 
! 		if (signaled)
  		{
! 			triggered = true;
! 			if (debug)
  			{
! 				fprintf(stderr, "\nsignaled to exit\n");
! 				fflush(stderr);
  			}
! 		}
! 		else
! 		{
! 
! 			if (debug)
  			{
! 				fprintf(stderr, "\nWAL file not present yet.");
! 				if (triggerPath)
! 					fprintf(stderr, " Checking for trigger file...");
! 				fflush(stderr);
  			}
  
! 			waittime += sleeptime;
! 
! 			if (!triggered && (CheckForExternalTrigger() || (waittime >= maxwaittime && maxwaittime > 0)))
! 			{
! 				triggered = true;
! 				if (debug && waittime >= maxwaittime && maxwaittime > 0)
! 					fprintf(stderr, "\nTimed out after %d seconds\n", waittime);
! 			}
  		}
  	}
  
  	/*
- 	 * Action on exit
- 	 */
- 	if (triggered)
- 		exit(1);				/* Normal exit, with non-zero */
- 
- 	/*
  	 * Once we have restored this file successfully we can remove some prior
  	 * WAL files. If this restore fails we musn't remove any file because some
  	 * of them will be requested again immediately after the failed restore,
--- 703,771 ----
  		}
  	}
  
!   	/*
!  	 * If "finish" trigger file exists, we would skip the wait loop and try to
!  	 * restore the log, which makes pg_standby act as cp or ln command.
!  	 */
!  	if (!CheckForExternalTrigger(finishTriggerPath, false))
!   	{
! 		/*
! 		 * Main wait loop
! 		 */
! 		while (!CustomizableNextWALFileReady() && !triggered)
  		{
! 			if (sleeptime <= 60)
! 				pg_usleep(sleeptime * 1000000L);
! 			
! 			if (signaled)
  			{
! 				triggered = true;
! 				if (debug)
! 				{
! 					fprintf(stderr, "\nsignaled to exit\n");
! 					fflush(stderr);
! 				}
  			}
! 			else
  			{
! 				if (debug)
! 				{
! 					fprintf(stderr, "\nWAL file not present yet.");
! 					fflush(stderr);
! 				}
! 				
! 				waittime += sleeptime;
! 				
! 				/*
! 				 * If "cancel" trigger file found, we *must* delete it. Here's why: When
! 				 * recovery finishes, we will be asked again for the same file from
! 				 * the archive using pg_standby so must remove trigger file so we can
! 				 * reload file again and come up correctly.
! 				 */
! 				if (!triggered && (CheckForExternalTrigger(cancelTriggerPath, true) ||
! 								   (waittime >= maxwaittime && maxwaittime > 0)))
! 				{
! 					triggered = true;
! 					if (debug && waittime >= maxwaittime && maxwaittime > 0)
! 						fprintf(stderr, "\nTimed out after %d seconds\n", waittime);
! 				}
  			}
  
! 			/*
!  			 * Action on exit
!  			 */
!  			if (triggered)
!  				exit(1);				/* Normal exit, with non-zero */
! 			
!  			/*
!  			 * If "finish" trigger file exists, we try to restore the log soon.
!  			 */
!  			if (CheckForExternalTrigger(finishTriggerPath, false))
!  				break;
  		}
  	}
  
  	/*
  	 * Once we have restored this file successfully we can remove some prior
  	 * WAL files. If this restore fails we musn't remove any file because some
  	 * of them will be requested again immediately after the failed restore,
Index: doc/src/sgml/pgstandby.sgml
===================================================================
RCS file: /projects/cvsroot/pgsql/doc/src/sgml/pgstandby.sgml,v
retrieving revision 2.7
diff -c -r2.7 pgstandby.sgml
*** doc/src/sgml/pgstandby.sgml	27 Feb 2009 09:30:21 -0000	2.7
--- doc/src/sgml/pgstandby.sgml	25 Mar 2009 06:26:40 -0000
***************
*** 174,180 ****
        </entry>
       </row>
       <row>
!       <entry><literal>-t</> <replaceable>triggerfile</></entry>
        <entry>none</entry>
        <entry>
         Specify a trigger file whose presence should cause recovery to end
--- 174,180 ----
        </entry>
       </row>
       <row>
!       <entry><literal>-t</> <replaceable>canceltrigger</></entry>
        <entry>none</entry>
        <entry>
         Specify a trigger file whose presence should cause recovery to end
***************
*** 182,188 ****
         It is recommended that you use a structured filename to
         avoid confusion as to which server is being triggered
         when multiple servers exist on the same system; for example
!        <filename>/tmp/pgsql.trigger.5432</>.
        </entry>
       </row>
       <row>
--- 182,201 ----
         It is recommended that you use a structured filename to
         avoid confusion as to which server is being triggered
         when multiple servers exist on the same system; for example
!        <filename>/tmp/pgsql.cancel.5442</>.
!        Note that the trigger file doesn't exist after recovery.
!       </entry>
!      </row>
!      <row>
!       <entry><literal>-T</> <replaceable>finishtrigger</></entry>
!       <entry>none</entry>
!       <entry>
!        Specify a trigger file whose presence should cause recovery to end
!        after replaying all the available WAL files.
!        It is recommended that you use a structured filename like <literal>-t</>.
!        <literal>-T</> is ignored if the same trigger file as that of
!        <literal>-t</> is specified.
!        Note that the trigger file remains even after recovery.
        </entry>
       </row>
       <row>
***************
*** 209,215 ****
    <programlisting>
  archive_command = 'cp %p .../archive/%f'
  
! restore_command = 'pg_standby -l -d -s 2 -t /tmp/pgsql.trigger.5442 .../archive %f %p %r 2>>standby.log'
    </programlisting>
    <para>
     where the archive directory is physically located on the standby server,
--- 222,228 ----
    <programlisting>
  archive_command = 'cp %p .../archive/%f'
  
! restore_command = 'pg_standby -l -d -s 2 -t /tmp/pgsql.cancel.5442 -T /tmp/pgsql.finish.5442 .../archive %f %p %r 2>>standby.log'
    </programlisting>
    <para>
     where the archive directory is physically located on the standby server,
***************
*** 236,242 ****
     <listitem>
      <para>
       stop waiting only when a trigger file called
!      <filename>/tmp/pgsql.trigger.5442</> appears
      </para>
     </listitem>
     <listitem>
--- 249,263 ----
     <listitem>
      <para>
       stop waiting only when a trigger file called
!      <filename>/tmp/pgsql.cancel.5442</> appears,
!      then do nothing even if there are the available WAL files
!     </para>
!    </listitem>
!    <listitem>
!     <para>
!      stop waiting only when a trigger file called
!      <filename>/tmp/pgsql.finish.5442</> appears,
!      then restore the available WAL file if it exists
      </para>
     </listitem>
     <listitem>
***************
*** 251,257 ****
    <programlisting>
  archive_command = 'copy %p ...\\archive\\%f'
  
! restore_command = 'pg_standby -d -s 5 -t C:\pgsql.trigger.5442 ...\archive %f %p %r 2>>standby.log'
    </programlisting>
    <para>
     Note that backslashes need to be doubled in the
--- 272,278 ----
    <programlisting>
  archive_command = 'copy %p ...\\archive\\%f'
  
! restore_command = 'pg_standby -d -s 5 -t C:\pgsql.cancel.5442 -T C:\pgsql.finish.5442 ...\archive %f %p %r 2>>standby.log'
    </programlisting>
    <para>
     Note that backslashes need to be doubled in the
***************
*** 277,283 ****
     <listitem>
      <para>
       stop waiting only when a trigger file called
!      <filename>C:\pgsql.trigger.5442</> appears
      </para>
     </listitem>
     <listitem>
--- 298,312 ----
     <listitem>
      <para>
       stop waiting only when a trigger file called
!      <filename>C:\pgsql.cancel.5442</> appears,
!      then do nothing even if there are the available WAL files
!     </para>
!    </listitem>
!    <listitem>
!     <para>
!      stop waiting only when a trigger file called
!      <filename>C:\pgsql.trigger.5442</> appears,
!      then restore the available WAL file if it exists
      </para>
     </listitem>
     <listitem>
