Hi,
here is a function which is about 8 x faster than the one described in the 
PostgreSQL SQL Tricks
( 
http://postgres.cz/wiki/PostgreSQL_SQL_Tricks#Function_for_decoding_of_url_code 
)

The idea is to handle each encoded/not_encoded parts in bulk rather than 
spliting on each character.

urldecode_arr:
Seq Scan on lt_referrer  (actual time=1.966..17623.979 rows=65717 loops=1)

urldecode:
Seq Scan on lt_referrer  (actual time=4.846..144445.292 rows=65717 loops=1)

regards,

Marc Mamin



CREATE OR REPLACE FUNCTION urldecode_arr(url text)
  RETURNS text AS
$BODY$
DECLARE ret text;

BEGIN
 BEGIN

    WITH STR AS (
      SELECT
      
      -- array with all non encoded parts, prepend with '' when the string 
start is encoded
      case when $1 ~ '^%[0-9a-fA-F][0-9a-fA-F]' 
           then array[''] 
           end 
      || regexp_split_to_array ($1,'(%[0-9a-fA-F][0-9a-fA-F])+', 'i') plain,
      
      -- array with all encoded parts
      array(select (regexp_matches ($1,'((?:%[0-9a-fA-F][0-9a-fA-F])+)', 
'gi'))[1]) encoded
    )
    SELECT  string_agg(plain[i] || coalesce( 
convert_from(decode(replace(encoded[i], '%',''), 'hex'), 'utf8'),''),'')
    FROM STR, 
      (SELECT  generate_series(1, array_upper(encoded,1)+2) i FROM STR)blah

    INTO ret;

  EXCEPTION WHEN OTHERS THEN  
    raise notice 'failed: %',url;
    return $1;
  END;   

  RETURN coalesce(ret,$1); -- when the string has no encoding;

END;

$BODY$
  LANGUAGE plpgsql IMMUTABLE STRICT


-- 
Sent via pgsql-general mailing list (pgsql-general@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-general

Reply via email to