Mark Kirkwood wrote:
> This is a well-worn thread title - apologies, but these results seemed 
> interesting, and hopefully useful in the quest to get better performance 
> on Solaris:
> 
> I was curious to see if the rather uninspiring pgbench performance 
> obtained from a Sun 280R (see General: ATA Disks and RAID controllers 
> for database servers) could be improved if more time was spent 
> tuning.        
> 
> With the help of a fellow workmate who is a bit of a Solaris guy, we 
> decided to have a go.
> 
> The major performance killer appeared to be mounting the filesystem with 
> the logging option. The next most significant seemed to be the choice of 
> sync_method for Pg - the default (open_datasync), which we initially 
> thought should be the best - appears noticeably slower than fdatasync.

I thought the default was fdatasync, but looking at the code it seems
the default is open_datasync if O_DSYNC is available.

I assume the logic is that we usually do only one write() before
fsync(), so open_datasync should be faster.  Why do we not use O_FSYNC
over fsync().

Looking at the code:
        
        #if defined(O_SYNC)
        #define OPEN_SYNC_FLAG     O_SYNC
        #else
        #if defined(O_FSYNC)
        #define OPEN_SYNC_FLAG    O_FSYNC
        #endif
        #endif
        
        #if defined(OPEN_SYNC_FLAG)
        #if defined(O_DSYNC) && (O_DSYNC != OPEN_SYNC_FLAG)
        #define OPEN_DATASYNC_FLAG    O_DSYNC
        #endif
        #endif
        
        #if defined(OPEN_DATASYNC_FLAG)
        #define DEFAULT_SYNC_METHOD_STR    "open_datasync"
        #define DEFAULT_SYNC_METHOD        SYNC_METHOD_OPEN
        #define DEFAULT_SYNC_FLAGBIT       OPEN_DATASYNC_FLAG
        #else
        #if defined(HAVE_FDATASYNC)
        #define DEFAULT_SYNC_METHOD_STR   "fdatasync"
        #define DEFAULT_SYNC_METHOD       SYNC_METHOD_FDATASYNC
        #define DEFAULT_SYNC_FLAGBIT      0
        #else
        #define DEFAULT_SYNC_METHOD_STR   "fsync"
        #define DEFAULT_SYNC_METHOD       SYNC_METHOD_FSYNC
        #define DEFAULT_SYNC_FLAGBIT      0
        #endif
        #endif

I think the problem is that we prefer O_DSYNC over fdatasync, but do not
prefer O_FSYNC over fsync.

Running the attached test program shows on BSD/OS 4.3:

        write                  0.000360
        write & fsync          0.001391
        write, close & fsync   0.001308
        open o_fsync, write    0.000924

showing O_FSYNC faster than fsync().

-- 
  Bruce Momjian                        |  http://candle.pha.pa.us
  [EMAIL PROTECTED]               |  (610) 359-1001
  +  If your life is a hard drive,     |  13 Roberts Road
  +  Christ can be your backup.        |  Newtown Square, Pennsylvania 19073
/*
 *      test_fsync.c
 *              tests if fsync can be done from another process than the original write
 */

#include <sys/types.h>
#include <fcntl.h>
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <unistd.h>

void die(char *str);
void print_elapse(struct timeval start_t, struct timeval elapse_t);

int main(int argc, char *argv[])
{
        struct timeval start_t;
        struct timeval elapse_t;
        int tmpfile;
        char *strout = 
"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa";

        /* write only */        
        gettimeofday(&start_t, NULL);
        if ((tmpfile = open("/var/tmp/test_fsync.out", O_RDWR | O_CREAT)) == -1)
                die("can't open /var/tmp/test_fsync.out");
        write(tmpfile, &strout, 200);
        close(tmpfile);         
        gettimeofday(&elapse_t, NULL);
        unlink("/var/tmp/test_fsync.out");
        printf("write                  ");
        print_elapse(start_t, elapse_t);
        printf("\n");

        /* write & fsync */
        gettimeofday(&start_t, NULL);
        if ((tmpfile = open("/var/tmp/test_fsync.out", O_RDWR | O_CREAT)) == -1)
                die("can't open /var/tmp/test_fsync.out");
        write(tmpfile, &strout, 200);
        fsync(tmpfile);
        close(tmpfile);         
        gettimeofday(&elapse_t, NULL);
        unlink("/var/tmp/test_fsync.out");
        printf("write & fsync          ");
        print_elapse(start_t, elapse_t);
        printf("\n");

        /* write, close & fsync */
        gettimeofday(&start_t, NULL);
        if ((tmpfile = open("/var/tmp/test_fsync.out", O_RDWR | O_CREAT)) == -1)
                die("can't open /var/tmp/test_fsync.out");
        write(tmpfile, &strout, 200);
        close(tmpfile);
        /* reopen file */
        if ((tmpfile = open("/var/tmp/test_fsync.out", O_RDWR | O_CREAT)) == -1)
                die("can't open /var/tmp/test_fsync.out");
        fsync(tmpfile);
        close(tmpfile);         
        gettimeofday(&elapse_t, NULL);
        unlink("/var/tmp/test_fsync.out");
        printf("write, close & fsync   ");
        print_elapse(start_t, elapse_t);
        printf("\n");

        /* open_fsync, write */
        gettimeofday(&start_t, NULL);
        if ((tmpfile = open("/var/tmp/test_fsync.out", O_RDWR | O_CREAT | O_FSYNC)) == 
-1)
                die("can't open /var/tmp/test_fsync.out");
        write(tmpfile, &strout, 200);
        close(tmpfile);
        gettimeofday(&elapse_t, NULL);
        unlink("/var/tmp/test_fsync.out");
        printf("open o_fsync, write    ");
        print_elapse(start_t, elapse_t);
        printf("\n");

        return 0;
}

void print_elapse(struct timeval start_t, struct timeval elapse_t)
{
        if (elapse_t.tv_usec < start_t.tv_usec)
        {
                elapse_t.tv_sec--;
                elapse_t.tv_usec += 1000000;
        }

        printf("%ld.%06ld", (long) (elapse_t.tv_sec - start_t.tv_sec),
                                         (long) (elapse_t.tv_usec - start_t.tv_usec));
}

void die(char *str)
{
        fprintf(stderr, "%s", str);
        exit(1);
}
---------------------------(end of broadcast)---------------------------
TIP 4: Don't 'kill -9' the postmaster

Reply via email to