Indexing is rather disk than CPU intensive, meaning the disk throughput, access times and file I/O performance are the limiting factors.
For this reason splitting it to multiple threads may only help if you have very fast disk, like SSD, but even this needs testing to see. I didn't test it, but as I imagine multiple threads can even cause a bad effect, since there is much more disk and cache trashing and also the written indexes will be much more fragmented on most file systems. It almost like trying to duplicate files on the same disk using multiple threads. Viktor On 2010 May 10, at 11:20, rafa wrote: > Hello, > I'm trying to do a re indexing using threads, but according to tests > under Windows Vista, > Quarda Intel Core, 4 Gig RAM, the tests are not entirely satisfactory; > > The basic idea was to determine the number of threads that want to > implement, > N_THREADS, which is passed by parameter, for testing purposes. > > Well, times; > 1 thread = 29 minutes THE WINNER! > 4 threads = 93 minutes WoW! > 7 threads = 68 minutes > 5 Threads = 64 minutes > > Something to do this by penalizing Harbour indexing, or the code above, > you have > thing excessively penalized. > > ---------------- SPANISH ------------------------------------------- > Hola, > > Estoy intentando hacer una re indexación usando hilos, pero según > pruebas bajo Windows Vista, > Intel Quard Core, 4 Gigas RAM, los test no son del todo satisfactorios; > > La idea básica es determinar la cantidad de hilos que queremos poner en > marcha, > N_THREADS, que es pasado por parámetro, para poder realizar pruebas. > > Pues bien, los tiempos ; > 1 Hilo = 29 minutos THE WINNER! > 4 Hilos = 93 minutos WoW!! > 7 hilos = 68 minutos > 5 Hilos = 64 minutos > > Algo hacer Harbour que esta penalizando la indexación, o el código > expuesto, tiene alguna > cosa que penaliza en exceso. > > Cualquier comentario es bienvenido para mejorar este aspecto. > > //---------------------------------------------- source code > ---------------------------------------------------------- > /* > Example multiThreads index. > One thread by table , and one thread by index. > 2010 Rafa Carmona > > Thread Main > |---------> table for test.dbf > | |----> Thread child index fname > | | > | |----->Thread child index fcode > > c:\> ..\..\bin\win\bcc\hbmk2 -mt indexthread -lhbcpage > -Le:\harbour\trunk\harbour\lib\win\bcc > > New code, now, if thread dead, new thread create! > > */ > #include "hbclass.ch" > #include "hbthread.ch" > #include "common.ch" > #include "inkey.ch" > #include "FileIO.ch" > > REQUEST HB_CODEPAGE_ES850, HB_CODEPAGE_ES850C > REQUEST HB_LANG_ES > > static N_THREADS := 5 > > STATIC nTecla > STATIC s_num_procesos > STATIC s_aLineas := { } > static s_hMutex > static s_hHandle_File > static s_nCount_Errores := 0 > > proc Main( nHilos ) > Local nSeconds > Local cDbf, lProcesa := .F. > Local aFicheros, x > Local aDbfs, aNtxs, aKeys , aDesc, aFor, aSel > Local aThreads := {} > Local nProceso := 0, nLinea, lSalir := .F., nIndex > Local nLen_Table > Local nPosTable, cCadena, cLine, aTokens, i > Local nPos_Column := 1, g > > > DEFAULT nHilos TO 0 > > if !empty( nHilos ) > N_THREADS := val( nHilos ) > endif > > //HB_SetCodePage( "ES850" ) En xHarbour > set( _SET_CODEPAGE, "ES850C" ) > > HB_LANGSELECT('ES') > Set( _SET_LANGUAGE, "ES" ) > > > setmode( 25,130 ) > cls > > @01,0 SAY padc( hb_ansitooem( " Indexación multihilo. Rafa Carmona" > ), 80 )COLOR "N*/W*" > @23,0 SAY padc( "Pulse ESC para cancelar." , 80) COLOR "R+/N" > > // ONLY for TEST, if necesary many many DBF , with millions records > for test. > // My test is over 200 dbfs, with 12GB total size, without NTX. > aDbfs := { "test", "test2" } // Arrays files dbf > > aNtxs := { { "fname", "fcode" },; // files index for test > { "fName2" } } // files index for test2 > > aKeys := { { "name", "code" },; > { "dtos(fecha)+str(code)" } } // Expresions > > aFor := { { "", ""}, {""} } > > > nLen_Table := len( aDbfs ) > nPosTable := 1 > nSeconds := Seconds() > s_num_procesos := 0 > s_hMutex := hb_mutexCreate() > s_hHandle_File := FCreate( "indexpms.log" ) > > for g := 1 to N_THREADS // Posicion en la columa por cada numero de > hilo > aadd( s_aLineas, nPos_Column ) > nPos_Column += 16 > next > > > while nPosTable <= nLen_Table > > if ( nTecla := inkey() ) = K_ESC > exit > endif > > if N_THREADS = s_num_procesos // No se ha muerto ningun proceso > loop > endif > > cDbf := aDbfs[ nPosTable ] > > > if file( cDbf+".dbf" ) > hb_mutexLock( s_hMutex ) > s_num_procesos++ > hb_mutexUnLock( s_hMutex ) > hb_threadStart( @aCreateIndexe(), cDbf, aNtxs[ nPosTable ], > aKeys[ nPosTable ], aFor[ nPosTable ] ) > endif > > nPosTable++ > end while > > @23,1 SAY "Espere, terminado reindexaciones pendientes..." + space( 50 > ) COLOR "R*/N" > hb_threadWaitForAll() // Esperamos a los ultimos. > @23,1 SAY "Proceso de indexacion terminado..." + Str( (Seconds() - > nSeconds) /60 ) + space( 50 ) COLOR "W+/N" > > FClose( s_hHandle_File ) > > return > > > function aCreateIndexe( cFile, aNtx, aExpr, aFor ) > Local nContador := 1 > Local cFileNtx, cExpr > Local aThreads := {} > Local cAlias, cFor > Local nP, x, oError, lreturn := .f. > > use ( cFile ) > if neterr() > hb_mutexLock( s_hMutex ) > s_num_procesos-- > s_nCount_Errores++ > hb_mutexUnLock( s_hMutex ) > @22,1 SAY "[ "+ alltrim( str( s_nCount_Errores ))+" ] Error de > apertura en el fichero : "+ cFile + space( 10 ) > fwrite( s_hHandle_File, "Error de apertura en el fichero : "+ > cFile + Hb_OsNewline() ) > return nil > endif > > cAlias := alias() > hb_dbDetach( cAlias ) // Libero el alias > > for x := 1 to len( s_aLineas ) > nPosLinea := s_aLineas[ x ] > if !empty( nPosLinea ) > s_aLineas[x] := 0 // Quita del array esa linea disponible > para pintar > exit > endif > next > > @ 2, nPosLinea CLEAR TO 15, nPosLinea + 14 // Limpiamos cuadrado > hb_dispOutAt( 2, nPosLinea , cFile ) > > for each cFileNtx in aNtx > cExpr := aExpr[ cFileNtx:__enumindex ] > cFor := aFor[ cFileNtx:__enumindex ] > nPos := cFileNtx:__enumindex > delete file ( cFileNtx +".ntx") // Se borra el fichero, por > ahorro de espacio en disco. > aadd( aThreads, hb_threadStart( @crea(), cAlias,cExpr, > cFileNtx, cFor, nPos, nPosLinea ) ) > if ( nTecla := inkey() ) = K_ESC // No protegemos variable > static > exit > endif > next > > aEval( aThreads, { |x| hb_threadJoin( x ) } ) // Espera que > termine los hilos hijos > > hb_dbRequest( cAlias, , , .T.) // Restaura el alias > close > > // Vuelve a colocar la linea como disponible para pintar > s_aLineas[x] := nPosLinea > hb_mutexLock( s_hMutex ) > s_num_procesos-- > hb_mutexUnLock( s_hMutex ) > > RETURN .T. > > proc crea( cAlias, cExpr, cFileNtx, cFor, nPos, nPosLinea ) > Local nContador := 1 > > hb_dbRequest( cAlias, , , .T.) // Restaura el alias > if empty( cfor ) > INDEX ON &(cExpr) TO &(cFileNtx) ; > EVAL {|| hb_dispOutAt( nPos +3, nPosLinea, padr( > cFileNtx,8 )+ "-"+alltrim( hb_valtostr( nContador) ), "GR+/N" ), > nContador += INT( LASTREC() / 100 ) , .T. } ; > EVERY INT( LASTREC() / 100 ) > // EVAL {|| hb_dispOutAt( nPos +2, nPosLinea, padr( > cFileNtx,8 )+ "-"+ padr( cExpr,20)+ " # " +alltrim( hb_valtostr( > nContador) ), "GR+/N" ), nContador += INT( LASTREC() / 100 ) , .T. } ; > else > INDEX ON &(cExpr) TO &(cFileNtx) FOR &(cFor) ; > EVAL {|| hb_dispOutAt( nPos +3, nPosLinea, padr( > cFileNtx,8 )+ "-"+alltrim( hb_valtostr( nContador) ), "GR+/N" ), > nContador += INT( LASTREC() / 100 ) , .T. } ; > EVERY INT( LASTREC() / 100 ) > endif > hb_dbDetach( cAlias ) // Libera el alias > > > return > _______________________________________________ > Harbour mailing list (attachment size limit: 40KB) > Harbour@harbour-project.org > http://lists.harbour-project.org/mailman/listinfo/harbour _______________________________________________ Harbour mailing list (attachment size limit: 40KB) Harbour@harbour-project.org http://lists.harbour-project.org/mailman/listinfo/harbour