see the udf
*parse_url_tuple*
SELECT b.*
FROM src LATERAL VIEW parse_url_tuple(fullurl, 'HOST', 'PATH', 'QUERY',
'QUERY:id') b as host, path, query, query_id LIMIT 1;


https://cwiki.apache.org/confluence/display/Hive/LanguageManual+UDF#LanguageManualUDF-parse_url_tuple

2016-09-20 11:22 GMT+02:00 Manish Rangari <linuxtricksfordev...@gmail.com>:

> Guys,
>
> I want to get the field of elb logs. A sample elb log is given below and I
> am using below create table definition. It is working fine. I am getting
> what I wanted but now I want the bold part as well. For example eid, tid,
> aid. Can anyone help me how can I match them as well.
>
> NOTE: The position of aid, eid, tid is not fixed and it may change.
>
> 2016-09-16T06:55:19.056871Z testelb 2.1.7.2:52399 192.168.1.5:80 0.000021
> 0.000596 0.00002 200 200 0 43 "GET https://site1.example.com:443/peek?
> *eid=aw123&tid=fskc235n&aid=2ADSFGSDG* HTTP/1.1" "Mozilla/5.0 (Windows NT
> 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/45.0.2454.85
> Safari/537.36" ECDHE-RSA-AES128-GCM-SHA256 TLSv1.2
>
>
> CREATE TABLE elblog (
> Request_date STRING,
>       ELBName STRING,
>       RequestIP STRING,
>       RequestPort INT,
>       BackendIP STRING,
>       BackendPort INT,
>       RequestProcessingTime DOUBLE,
>       BackendProcessingTime DOUBLE,
>       ClientResponseTime DOUBLE,
>       ELBResponseCode STRING,
>       BackendResponseCode STRING,
>       ReceivedBytes BIGINT,
>       SentBytes BIGINT,
>       RequestVerb STRING,
>       URL STRING,
>       Protocol STRING,
> Useragent STRING,
> ssl_cipher STRING,
> ssl_protocol STRING
> )
> ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.RegexSerDe'
> WITH SERDEPROPERTIES (
>           "input.regex" = "([^ ]*) ([^ ]*) ([^ ]*):([0-9]*) ([^
> ]*):([0-9]*) ([.0-9]*) ([.0-9]*) ([.0-9]*) (-|[0-9]*) (-|[0-9]*) ([-0-9]*)
> ([-0-9]*) \"([^ ]*) ([^ ]*) (- |[^ ]*)\" \"(.*)\" (.*) (.*)$"
> )
> STORED AS TEXTFILE;
>

Reply via email to