Improve Python + Influxdb import performance
Hello World The following script is an extract from https://github.com/RittmanMead/obi-metrics-agent/blob/master/obi-metrics-agent.py <> import calendar, time import sys import getopt print '---' # Check the arguments to this script are as expected. # argv[0] is script name. argLen = len(sys.argv) if argLen -1 < 2: print "ERROR: got ", argLen -1, " args, must be at least two." print '$FMW_HOME/oracle_common/common/bin/wlst.sh obi-metrics-agent.py [] [] [] [] [targetDB influx db>' exit() outputFormat='CSV' url='t3://localhost:7001' targetHost='localhost' targetDB='obi' targetPort='8086' try: wls_user = sys.argv[1] wls_pw = sys.argv[2] url = sys.argv[3] outputFormat=sys.argv[4] targetHost=sys.argv[5] targetPort=sys.argv[6] targetDB=sys.argv[7] except: print '' print wls_user, wls_pw,url, outputFormat,targetHost,targetPort,targetDB now_epoch = calendar.timegm(time.gmtime())*1000 if outputFormat=='InfluxDB': import httplib influx_msgs='' connect(wls_user,wls_pw,url) results = displayMetricTables('Oracle_BI*','dms_cProcessInfo') for table in results: tableName = table.get('Table') rows = table.get('Rows') rowCollection = rows.values() iter = rowCollection.iterator() while iter.hasNext(): row = iter.next() rowType = row.getCompositeType() keys = rowType.keySet() keyIter = keys.iterator() inst_name= row.get('Name').replace(' ','-') try: server= row.get('Servername').replace(' ','-').replace('/','_') except: try: server= row.get('ServerName').replace(' ','-').replace('/','_') except: server='unknown' try: host= row.get('Host').replace(' ','-') except: host='' while keyIter.hasNext(): columnName = keyIter.next() value = row.get(columnName ) if columnName.find('.value')>0: metric_name=columnName.replace('.value','') if value is not None: if value != 0: if outputFormat=='InfluxDB': influx_msg= ('%s,server=%s,host=%s,metric_group=%s,metric_instance=%s value=%s %s') % (metric_name,server,host,tableName,inst_name, value,now_epoch*100) influx_msgs+='\n%s' % influx_msg conn = httplib.HTTPConnection('%s:%s' % (targetHost,targetPort)) ## TODO pretty sure should be urlencoding this ... a=conn.request("POST", ("/write?db=%s" % targetDB), influx_msg) r=conn.getresponse() <> It currently takes about 3 minutes to execute completely and I was thinking of a way to make it run faster Data alignment (Influx line protocol) & data loading - done together takes up most of the time Influxdb is currently loading data at around 3 points/second Any way to align the data separately, store it and load it as a batch? I feel that would help improve performance Please let me know if you have any pointers I can send the data sheet if required Thanks P -- https://mail.python.org/mailman/listinfo/python-list
Re: Improve Python + Influxdb import performance
On Monday, April 3, 2017 at 9:52:38 PM UTC+5:30, INADA Naoki wrote: > You can reuse connection, instead of creating for each request. (HTTP > keep-alive). > > On Tue, Apr 4, 2017 at 1:11 AM, Prathamesh > wrote: > > Hello World > > > > The following script is an extract from > > > > https://github.com/RittmanMead/obi-metrics-agent/blob/master/obi-metrics-agent.py > > > > <> > > > > import calendar, time > > import sys > > import getopt > > > > print '---' > > > > # Check the arguments to this script are as expected. > > # argv[0] is script name. > > argLen = len(sys.argv) > > if argLen -1 < 2: > > print "ERROR: got ", argLen -1, " args, must be at least two." > > print '$FMW_HOME/oracle_common/common/bin/wlst.sh obi-metrics-agent.py > > [] [] > > [] [] [targetDB influx db>' > > exit() > > > > outputFormat='CSV' > > url='t3://localhost:7001' > > targetHost='localhost' > > targetDB='obi' > > targetPort='8086' > > > > try: > > wls_user = sys.argv[1] > > wls_pw = sys.argv[2] > > url = sys.argv[3] > > outputFormat=sys.argv[4] > > targetHost=sys.argv[5] > > targetPort=sys.argv[6] > > targetDB=sys.argv[7] > > except: > > print '' > > > > print wls_user, wls_pw,url, outputFormat,targetHost,targetPort,targetDB > > > > now_epoch = calendar.timegm(time.gmtime())*1000 > > > > if outputFormat=='InfluxDB': > > import httplib > > influx_msgs='' > > > > connect(wls_user,wls_pw,url) > > results = displayMetricTables('Oracle_BI*','dms_cProcessInfo') > > for table in results: > > tableName = table.get('Table') > > rows = table.get('Rows') > > rowCollection = rows.values() > > iter = rowCollection.iterator() > > while iter.hasNext(): > > row = iter.next() > > rowType = row.getCompositeType() > > keys = rowType.keySet() > > keyIter = keys.iterator() > > inst_name= row.get('Name').replace(' ','-') > > try: > > server= row.get('Servername').replace(' ','-').replace('/','_') > > except: > > try: > > server= row.get('ServerName').replace(' > > ','-').replace('/','_') > > except: > > server='unknown' > > try: > > host= row.get('Host').replace(' ','-') > > except: > > host='' > > while keyIter.hasNext(): > > columnName = keyIter.next() > > value = row.get(columnName ) > > if columnName.find('.value')>0: > > metric_name=columnName.replace('.value','') > > if value is not None: > > if value != 0: > > if outputFormat=='InfluxDB': > > influx_msg= > > ('%s,server=%s,host=%s,metric_group=%s,metric_instance=%s value=%s %s') % > > (metric_name,server,host,tableName,inst_name, value,now_epoch*100) > > influx_msgs+='\n%s' % influx_msg > > conn = httplib.HTTPConnection('%s:%s' % > > (targetHost,targetPort)) > > ## TODO pretty sure should be urlencoding this > > ... > > a=conn.request("POST", ("/write?db=%s" % > > targetDB), influx_msg) > > r=conn.getresponse() > > > > <> > > > > It currently takes about 3 minutes to execute completely and I was thinking > > of a way to make it run faster > > > > Data alignment (Influx line protocol) & data loading - done together takes > > up most of the time > > > > Influxdb is currently loading data at around 3 points/second > > > > Any way to align the data separately, store it and load it as a batch? > > > > I feel that would help improve performance > > > > Please let me know if you have any pointers > > I can send the data sheet if required > > > > Thanks P > > -- > > https://mail.python.org/mailman/listinfo/python-list How do I do that? -- https://mail.python.org/mailman/listinfo/python-list
Re: Improve Python + Influxdb import performance
Hi Inada Thank you for your response Weblogic works on Jython 2.2.1 and I think the Python requests module requires >= python 2.6 Please correct me if I'm wrong or if there is another way to get this to work Thanks P On Monday, April 3, 2017 at 9:52:38 PM UTC+5:30, INADA Naoki wrote: > You can reuse connection, instead of creating for each request. (HTTP > keep-alive). > > On Tue, Apr 4, 2017 at 1:11 AM, Prathamesh > wrote: > > Hello World > > > > The following script is an extract from > > > > https://github.com/RittmanMead/obi-metrics-agent/blob/master/obi-metrics-agent.py > > > > <> > > > > import calendar, time > > import sys > > import getopt > > > > print '---' > > > > # Check the arguments to this script are as expected. > > # argv[0] is script name. > > argLen = len(sys.argv) > > if argLen -1 < 2: > > print "ERROR: got ", argLen -1, " args, must be at least two." > > print '$FMW_HOME/oracle_common/common/bin/wlst.sh obi-metrics-agent.py > > [] [] > > [] [] [targetDB influx db>' > > exit() > > > > outputFormat='CSV' > > url='t3://localhost:7001' > > targetHost='localhost' > > targetDB='obi' > > targetPort='8086' > > > > try: > > wls_user = sys.argv[1] > > wls_pw = sys.argv[2] > > url = sys.argv[3] > > outputFormat=sys.argv[4] > > targetHost=sys.argv[5] > > targetPort=sys.argv[6] > > targetDB=sys.argv[7] > > except: > > print '' > > > > print wls_user, wls_pw,url, outputFormat,targetHost,targetPort,targetDB > > > > now_epoch = calendar.timegm(time.gmtime())*1000 > > > > if outputFormat=='InfluxDB': > > import httplib > > influx_msgs='' > > > > connect(wls_user,wls_pw,url) > > results = displayMetricTables('Oracle_BI*','dms_cProcessInfo') > > for table in results: > > tableName = table.get('Table') > > rows = table.get('Rows') > > rowCollection = rows.values() > > iter = rowCollection.iterator() > > while iter.hasNext(): > > row = iter.next() > > rowType = row.getCompositeType() > > keys = rowType.keySet() > > keyIter = keys.iterator() > > inst_name= row.get('Name').replace(' ','-') > > try: > > server= row.get('Servername').replace(' ','-').replace('/','_') > > except: > > try: > > server= row.get('ServerName').replace(' > > ','-').replace('/','_') > > except: > > server='unknown' > > try: > > host= row.get('Host').replace(' ','-') > > except: > > host='' > > while keyIter.hasNext(): > > columnName = keyIter.next() > > value = row.get(columnName ) > > if columnName.find('.value')>0: > > metric_name=columnName.replace('.value','') > > if value is not None: > > if value != 0: > > if outputFormat=='InfluxDB': > > influx_msg= > > ('%s,server=%s,host=%s,metric_group=%s,metric_instance=%s value=%s %s') % > > (metric_name,server,host,tableName,inst_name, value,now_epoch*100) > > influx_msgs+='\n%s' % influx_msg > > conn = httplib.HTTPConnection('%s:%s' % > > (targetHost,targetPort)) > > ## TODO pretty sure should be urlencoding this > > ... > > a=conn.request("POST", ("/write?db=%s" % > > targetDB), influx_msg) > > r=conn.getresponse() > > > > <> > > > > It currently takes about 3 minutes to execute completely and I was thinking > > of a way to make it run faster > > > > Data alignment (Influx line protocol) & data loading - done together takes > > up most of the time > > > > Influxdb is currently loading data at around 3 points/second
Re: Improve Python + Influxdb import performance
Hi I've installed Jython 2.7 and was able to get weblogic running as a Jython 2.7 module following https://technology.amis.nl/2015/10/04/how-to-use-wlst-as-a-jython-2-7-module/ I got the requests module 2.7 installed as well But am facing an issue when invoking it related to SSL certificates and to stop those errors I think you need utillib2/utillib3 and that has dependencies one of which is cryptography - which is a real pain to install Any pointers on this or easier way to install this? I'm running Windows 2008R2 Also my pip install doesn't work I get geteid error Please let me know Thanks P On Tuesday, April 4, 2017 at 6:51:28 PM UTC+5:30, Prathamesh wrote: > Hi Inada > > Thank you for your response > Weblogic works on Jython 2.2.1 > and I think the Python requests module requires >= python 2.6 > > Please correct me if I'm wrong > or if there is another way to get this to work > > Thanks > P > > On Monday, April 3, 2017 at 9:52:38 PM UTC+5:30, INADA Naoki wrote: > > You can reuse connection, instead of creating for each request. (HTTP > > keep-alive). > > > > On Tue, Apr 4, 2017 at 1:11 AM, Prathamesh > > wrote: > > > Hello World > > > > > > The following script is an extract from > > > > > > https://github.com/RittmanMead/obi-metrics-agent/blob/master/obi-metrics-agent.py > > > > > > <> > > > > > > import calendar, time > > > import sys > > > import getopt > > > > > > print '---' > > > > > > # Check the arguments to this script are as expected. > > > # argv[0] is script name. > > > argLen = len(sys.argv) > > > if argLen -1 < 2: > > > print "ERROR: got ", argLen -1, " args, must be at least two." > > > print '$FMW_HOME/oracle_common/common/bin/wlst.sh > > > obi-metrics-agent.py > > > [] [] [] [ > > port>] [targetDB influx db>' > > > exit() > > > > > > outputFormat='CSV' > > > url='t3://localhost:7001' > > > targetHost='localhost' > > > targetDB='obi' > > > targetPort='8086' > > > > > > try: > > > wls_user = sys.argv[1] > > > wls_pw = sys.argv[2] > > > url = sys.argv[3] > > > outputFormat=sys.argv[4] > > > targetHost=sys.argv[5] > > > targetPort=sys.argv[6] > > > targetDB=sys.argv[7] > > > except: > > > print '' > > > > > > print wls_user, wls_pw,url, outputFormat,targetHost,targetPort,targetDB > > > > > > now_epoch = calendar.timegm(time.gmtime())*1000 > > > > > > if outputFormat=='InfluxDB': > > > import httplib > > > influx_msgs='' > > > > > > connect(wls_user,wls_pw,url) > > > results = displayMetricTables('Oracle_BI*','dms_cProcessInfo') > > > for table in results: > > > tableName = table.get('Table') > > > rows = table.get('Rows') > > > rowCollection = rows.values() > > > iter = rowCollection.iterator() > > > while iter.hasNext(): > > > row = iter.next() > > > rowType = row.getCompositeType() > > > keys = rowType.keySet() > > > keyIter = keys.iterator() > > > inst_name= row.get('Name').replace(' ','-') > > > try: > > > server= row.get('Servername').replace(' > > > ','-').replace('/','_') > > > except: > > > try: > > > server= row.get('ServerName').replace(' > > > ','-').replace('/','_') > > > except: > > > server='unknown' > > > try: > > > host= row.get('Host').replace(' ','-') > > > except: > > > host='' > > > while keyIter.hasNext(): > > > columnName = keyIter.next() > > > value = row.get(columnName ) > > > if columnName.find('.value')>0: > > > metric_name=columnName.replace('.value','') > > > if value is not None: > > >