Indexing is rather disk than CPU intensive, 
meaning the disk throughput, access times 
and file I/O performance are the limiting 
factors.

For this reason splitting it to multiple 
threads may only help if you have very fast 
disk, like SSD, but even this needs testing 
to see.

I didn't test it, but as I imagine multiple 
threads can even cause a bad effect, since 
there is much more disk and cache trashing 
and also the written indexes will be much 
more fragmented on most file systems.

It almost like trying to duplicate files 
on the same disk using multiple threads.

Viktor

On 2010 May 10, at 11:20, rafa wrote:

> Hello,
> I'm trying to do a re indexing using threads, but according to tests
> under Windows Vista,
> Quarda Intel Core, 4 Gig RAM, the tests are not entirely satisfactory;
> 
> The basic idea was to determine the number of threads that want to
> implement,
> N_THREADS, which is passed by parameter, for testing purposes.
> 
> Well, times;
> 1 thread    = 29 minutes THE WINNER!
> 4 threads  = 93 minutes WoW!
> 7 threads  = 68 minutes
> 5 Threads = 64 minutes
> 
> Something to do this by penalizing Harbour indexing, or the code above,
> you have
> thing excessively penalized.
> 
> ---------------- SPANISH -------------------------------------------
> Hola,
> 
> Estoy intentando hacer una re indexación usando hilos, pero según
> pruebas bajo Windows Vista,
> Intel Quard Core, 4 Gigas RAM, los test no son del todo satisfactorios;
> 
> La idea básica es determinar la cantidad de hilos que queremos poner en
> marcha,
> N_THREADS, que es pasado por parámetro, para poder realizar pruebas.
> 
> Pues bien, los tiempos ;
> 1 Hilo = 29 minutos  THE WINNER!
> 4 Hilos = 93 minutos WoW!!
> 7 hilos = 68 minutos
> 5 Hilos = 64 minutos
> 
> Algo hacer Harbour que esta penalizando la indexación, o el código
> expuesto, tiene alguna
> cosa que penaliza en exceso.
> 
> Cualquier comentario es bienvenido para mejorar este aspecto.
> 
> //---------------------------------------------- source code
> ----------------------------------------------------------
> /*
>   Example multiThreads index.
>   One thread by table , and one thread by index.
>   2010 Rafa Carmona
> 
>   Thread Main
>        |--------->  table for test.dbf
>        |                        |----> Thread child index fname
>        |                        |
>        |                        |----->Thread child index fcode
> 
>   c:\> ..\..\bin\win\bcc\hbmk2 -mt indexthread -lhbcpage
> -Le:\harbour\trunk\harbour\lib\win\bcc
> 
>   New code, now, if thread dead, new thread create!
> 
> */
> #include "hbclass.ch"
> #include "hbthread.ch"
> #include "common.ch"
> #include "inkey.ch"
> #include "FileIO.ch"
> 
> REQUEST HB_CODEPAGE_ES850, HB_CODEPAGE_ES850C
> REQUEST HB_LANG_ES
> 
> static N_THREADS := 5
> 
> STATIC nTecla
> STATIC s_num_procesos
> STATIC s_aLineas := {  }
> static s_hMutex
> static s_hHandle_File
> static s_nCount_Errores := 0
> 
> proc Main( nHilos )
>    Local nSeconds
>    Local cDbf, lProcesa := .F.
>    Local aFicheros, x
>    Local aDbfs, aNtxs, aKeys , aDesc, aFor, aSel
>    Local aThreads := {}
>    Local nProceso := 0, nLinea, lSalir := .F., nIndex
>    Local nLen_Table
>    Local nPosTable, cCadena, cLine, aTokens, i
>    Local nPos_Column  := 1, g
> 
> 
>    DEFAULT nHilos     TO 0
> 
>    if !empty( nHilos )
>        N_THREADS := val( nHilos )
>    endif
> 
>     //HB_SetCodePage( "ES850" ) En xHarbour
>    set( _SET_CODEPAGE, "ES850C" )
> 
>    HB_LANGSELECT('ES')
>    Set( _SET_LANGUAGE, "ES" )
> 
> 
>    setmode( 25,130 )
>    cls
> 
>    @01,0 SAY padc( hb_ansitooem( "  Indexación multihilo. Rafa Carmona"
> ), 80 )COLOR "N*/W*"
>    @23,0 SAY padc( "Pulse ESC para cancelar." , 80) COLOR "R+/N"
> 
>   // ONLY for TEST, if necesary many many DBF , with millions records
> for test.
>  // My test is over 200 dbfs, with 12GB total size, without NTX.
>   aDbfs := { "test", "test2" } // Arrays files dbf  
> 
>    aNtxs := { { "fname", "fcode" },; // files index for test
>                      { "fName2" } } // files index for test2  
> 
>    aKeys := { { "name", "code" },;
>                      { "dtos(fecha)+str(code)" } } // Expresions
> 
>    aFor  :=  { { "", ""}, {""} }
> 
> 
>    nLen_Table := len( aDbfs )
>    nPosTable  := 1
>    nSeconds := Seconds()
>    s_num_procesos := 0
>    s_hMutex := hb_mutexCreate()
>    s_hHandle_File := FCreate( "indexpms.log" )
> 
>    for g := 1 to N_THREADS  // Posicion en la columa por cada numero de
> hilo
>       aadd( s_aLineas, nPos_Column )
>        nPos_Column += 16
>    next
> 
> 
>     while nPosTable <= nLen_Table
> 
>        if ( nTecla := inkey() ) = K_ESC
>           exit
>        endif
> 
>        if N_THREADS = s_num_procesos // No se ha muerto ningun proceso
>          loop
>        endif
> 
>        cDbf := aDbfs[ nPosTable ]
> 
> 
>        if  file( cDbf+".dbf" )
>           hb_mutexLock( s_hMutex )
>           s_num_procesos++
>           hb_mutexUnLock( s_hMutex )
>           hb_threadStart( @aCreateIndexe(), cDbf, aNtxs[ nPosTable ],
> aKeys[ nPosTable ], aFor[ nPosTable ]  )
>        endif
> 
>        nPosTable++
>    end while
> 
>  @23,1 SAY "Espere, terminado reindexaciones pendientes..." + space( 50
> ) COLOR "R*/N"
>  hb_threadWaitForAll() // Esperamos a los ultimos.
>  @23,1 SAY "Proceso de indexacion terminado..." + Str( (Seconds() -
> nSeconds) /60 ) + space( 50 ) COLOR "W+/N"
> 
>  FClose( s_hHandle_File )
> 
> return
> 
> 
> function aCreateIndexe( cFile, aNtx, aExpr, aFor )
>       Local nContador := 1
>       Local cFileNtx, cExpr
>       Local aThreads := {}
>       Local cAlias, cFor
>       Local  nP, x, oError, lreturn := .f.
> 
>       use ( cFile )
>       if neterr()
>          hb_mutexLock( s_hMutex )
>          s_num_procesos--
>          s_nCount_Errores++
>          hb_mutexUnLock( s_hMutex )
>          @22,1 SAY "[ "+ alltrim( str( s_nCount_Errores ))+" ] Error de
> apertura en el fichero : "+ cFile + space( 10 )
>          fwrite( s_hHandle_File, "Error de apertura en el fichero : "+
> cFile + Hb_OsNewline() )
>          return nil
>       endif
> 
>       cAlias := alias()
>       hb_dbDetach( cAlias )  // Libero el alias
> 
>       for x := 1 to len( s_aLineas )
>          nPosLinea := s_aLineas[ x ]
>          if !empty( nPosLinea )
>              s_aLineas[x] := 0  // Quita del array esa linea disponible
> para pintar
>              exit
>          endif
>       next
> 
>       @ 2, nPosLinea CLEAR TO 15, nPosLinea + 14 // Limpiamos cuadrado
>       hb_dispOutAt( 2, nPosLinea , cFile )
> 
>       for each cFileNtx in aNtx
>           cExpr  := aExpr[ cFileNtx:__enumindex ]
>           cFor   := aFor[ cFileNtx:__enumindex ]
>           nPos   := cFileNtx:__enumindex
>           delete file ( cFileNtx +".ntx")  // Se borra el fichero, por
> ahorro de espacio en disco.
>           aadd( aThreads, hb_threadStart( @crea(), cAlias,cExpr,
> cFileNtx, cFor, nPos, nPosLinea ) )
>           if ( nTecla := inkey() ) = K_ESC  // No protegemos variable
> static
>              exit
>           endif
>       next
> 
>       aEval( aThreads, { |x| hb_threadJoin( x ) } )  // Espera que
> termine los hilos hijos
> 
>       hb_dbRequest( cAlias, , , .T.)  // Restaura el alias
>       close
> 
>       // Vuelve a colocar la linea como disponible para pintar
>       s_aLineas[x] := nPosLinea
>       hb_mutexLock( s_hMutex )
>       s_num_procesos--
>       hb_mutexUnLock( s_hMutex )
> 
> RETURN .T.
> 
> proc crea( cAlias, cExpr, cFileNtx, cFor, nPos, nPosLinea )
>      Local nContador := 1
> 
>       hb_dbRequest( cAlias, , , .T.)  // Restaura el alias
>       if empty( cfor )
>           INDEX ON &(cExpr) TO &(cFileNtx) ;
>                 EVAL {|| hb_dispOutAt( nPos +3, nPosLinea, padr(
> cFileNtx,8 )+ "-"+alltrim( hb_valtostr( nContador) ), "GR+/N" ),
> nContador += INT( LASTREC() / 100 ) , .T. } ;
>                 EVERY INT( LASTREC() / 100 )
> //                 EVAL {|| hb_dispOutAt( nPos +2, nPosLinea, padr(
> cFileNtx,8 )+ "-"+ padr( cExpr,20)+ " # " +alltrim( hb_valtostr(
> nContador) ), "GR+/N" ), nContador += INT( LASTREC() / 100 ) , .T. } ;
>       else
>           INDEX ON &(cExpr) TO &(cFileNtx) FOR &(cFor) ;
>                 EVAL {|| hb_dispOutAt( nPos +3, nPosLinea, padr(
> cFileNtx,8 )+ "-"+alltrim( hb_valtostr( nContador) ), "GR+/N" ),
> nContador += INT( LASTREC() / 100 ) , .T. } ;
>                 EVERY INT( LASTREC() / 100 )
>       endif
>       hb_dbDetach( cAlias )          // Libera el alias
> 
> 
> return
> _______________________________________________
> Harbour mailing list (attachment size limit: 40KB)
> Harbour@harbour-project.org
> http://lists.harbour-project.org/mailman/listinfo/harbour

_______________________________________________
Harbour mailing list (attachment size limit: 40KB)
Harbour@harbour-project.org
http://lists.harbour-project.org/mailman/listinfo/harbour

Reply via email to