To find all js/css/image pathnames in a HTML document, I used regular 
expression(in the last line of my code snippet) to do this as the following, 
are there any other shorter regular expressions or more efficient ways to do 
this ?


import re
translation='''<link rel="stylesheet" type="text/css" href="O8C.css">

<span eid="beetle_e" id="beetle_e" level="0" alpha_id="000003072" name="beetle" 
idm_id="000003072" backup-class="b" class="entry">

  <span level="1" class="h-g">

    <span level="2" class="top-g">

      <span level="3" class="h">bee·tle™</span>

      <span type="h_full_" level="3" class="ei-g">

        <span class="z_ei-g">/</span>

        <span file="{gb}/b/be/bee/beetle#_gb_1.spx" level="4" wd="beetle" 
recdate="070514" class="phon-gb">ˈbiːtl</span>

        <a type="sound" topic="b/bee/beetl/beetle__gb_1.spx" resource="uk_pron" 
backup-class="Media" class="fayin" href="sound://uk/beetle__gb_1.spx"><img 
src="uk_pron.png" class="fayin"/></a>

        <span class="z">;

          <span class="z_phon-us">NAmE</span>

        </span>

        <span file="{gb}/b/be/bee/beetle#_us_1.spx" sup="y" level="4" 
wd="beetle" recdate="070514" class="phon-us">ˈbiːtl</span>

        <a type="sound" topic="b/bee/beetl/beetle__us_1.spx" resource="us_pron" 
backup-class="Media" class="fayin" href="sound://us/beetle__us_1.spx"><img 
src="us_pron.png" class="fayin"/></a>

        <span class="z_ei-g">/</span>

      </span>

      <span level="3" display="inline" class="pos-g">

        <span topic="beetle_e" bookmark="beetle_pos_n" class="Ref">

          <a href="entry://#beetle_pos_n" level="4" pos="n" 
backup-class="pos">noun</a>

        </span>

        <span class="z">,</span>

        <span topic="beetle_e" bookmark="beetle_pos_v" class="Ref">

          <a href="entry://#beetle_pos_v" level="4" pos="v" 
backup-class="pos">verb</a>

        </span>

      </span>

    </span>

    <span level="2" class="infl">

      <span level="3" class="inflection">beetle</span>

      <span level="3" class="inflection">beetles</span>

      <span level="3" class="inflection">beetled</span>

      <span level="3" class="inflection">beetling</span>

    </span>

  </span>

  <a name="beetle_pos_n"></a>

  <span eid="beetle_pos_n" id="beetle_pos_n" level="1" class="p-g">

    <span level="2" class="block-g">

      <span level="3" class="pos-g">

        <span level="4" pos="n" class="pos">noun</span>

      </span>

      <img src="/pic/insects_comp.jpg" alt="/pic/insects_comp.jpg" height="620" 
width="720" style="display:none;" 
onclick="this.style.display=&apos;none&apos;;this.nextSibling.nextSibling.style.display=&apos;block&apos;;"/>

      <img type="image" topic="insects_comp.htm" thumb_resource="thumb" 
resource="pic" thumb="beetle.jpg" class="Media" backup-height="620" 
backup-width="720" src="/thumb/beetle.jpg" alt="/thumb/beetle.jpg" 
onclick="this.style.display=&apos;none&apos;;this.previousSibling.previousSibling.style.display=&apos;block&apos;;"/>

      <span class="clear"></span>

    </span>

    <span eid="beetle_ng_1" id="beetle_ng_1" level="2" n="1" class="n-g">

      <span class="z_n">1</span>

      <span level="3" class="def-g">

        <span status="6" level="4" tranidoupc="1" class="d">an insect, often 
large and black, with a hard case on its back, covering its wings. There are 
several types of

          <span level="5" class="dh">beetle.</span>

          <span localeuidoupc="201" status="6" level="5" class="chn">甲虫</span>

        </span>

      </span>

      <span xt="see" level="3" class="xr-g">

        <span class="symbols-xrsym">☞</span>see also

        <span eid="beetle_xr_1" id="beetle_xr_1" xt="see" 
href="deathwatchbeetle_e" level="4" pos="n" class="xr">

          <span topic="deathwatchbeetle_e" fk="XXX" class="Ref">

            <span level="5" class="xh">

              <a href="entry://death-watch beetle">death-watch beetle</a>

            </span>

          </span>

        </span>

      </span>

    </span>

    <span eid="beetle_ng_2" id="beetle_ng_2" new="seven" level="2" n="2" 
enc="y" class="n-g">

      <span class="z_n">2</span>

      <span level="3" class="alt">Beetle</span>

      <span level="3" class="vs-g">

        <span class="z">(</span>

        <span level="4" brackets="n" display="inline" class="label-g">

          <span level="5" g="amalso" class="g">NAmE also</span>

        </span>

        <span level="4" class="v">bug</span>

        <span class="z">)</span>

      </span>

      <span level="3" class="def-g">

        <span status="6" level="4" tranidoupc="7" class="d">the English names 
for the original Volkswagen small car with a round shape at the front and the 
back

          <span localeuidoupc="201" status="6" level="5" 
class="chn">“甲壳虫”(英国人用以指称一款圆头圆顶的原大众牌的小汽车)</span>

        </span>

      </span>

    </span>

  </span>

  <a name="beetle_pos_v"></a>

  <span eid="beetle_pos_v" id="beetle_pos_v" level="1" class="p-g">

    <span level="2" class="block-g">

      <span level="3" class="pos-g">

        <span level="4" pos="v" class="pos">verb</span>

      </span>

    </span>

    <span gr="i" level="2" class="gr">

      <span class="z_gr_br">[</span>intransitive

      <span class="z_gr_br">]</span>

    </span>

    <span eid="beetle_cf_1" id="beetle_cf_1" level="2" class="cf">+ 
adv./prep.</span>

    <span level="2" class="def-g">

      <span level="3" display="inline" class="label-g">(

        <span level="4" g="br" class="g">BrE</span>) (

        <span level="4" r="infml" class="r">informal</span>)

      </span>

      <span status="6" level="3" tranidoupc="3" class="d">to move somewhere 
quickly

        <span localeuidoupc="201" status="6" level="4" class="chn">快速移动</span>

      </span>

    </span>

    <span xt="syn" level="2" class="xr-g">

      <span class="symbols-synsym">SYN</span>

      <span eid="beetle_xr_2" id="beetle_xr_2" xt="syn" href="scurry_e" 
level="3" pos="v" class="xr">

        <span topic="scurry_e" fk="XXX" class="Ref">

          <span level="4" class="xh">

            <a href="entry://scurry">scurry</a>

          </span>

        </span>

      </span>

    </span>

    <span eid="beetle_xg_1" id="beetle_xg_1" level="2" class="x-g">

      <span class="symbols-xsym">◆</span>

      <span status="6" record="y" level="3" tranidoupc="4" class="x">I last saw 
him beetling off down the road.</span>

      <span localeuidoupc="201" status="6" level="3" 
class="tx">我上次见到他时,他正快步沿路而去。</span>

    </span>

  </span>

  <span class="pracpron">

    <span class="pron-g">

      <span type="h" class="wd">bee·tle™</span>

      <span type="h_full_" level="3" class="ei-g">

        <span class="z_ei-g">/</span>

        <span file="{gb}/b/be/bee/beetle#_gb_1.spx" level="4" wd="beetle" 
recdate="070514" class="phon-gb">ˈbiːtl</span>

        <a type="sound" topic="b/bee/beetl/beetle__gb_1.spx" resource="uk_pron" 
backup-class="Media" class="fayin" href="sound://uk/beetle__gb_1.spx"><img 
src="uk_pron.png" class="fayin"/></a>

        <span class="z">;

          <span class="z_phon-us">NAmE</span>

        </span>

        <span file="{gb}/b/be/bee/beetle#_us_1.spx" sup="y" level="4" 
wd="beetle" recdate="070514" class="phon-us">ˈbiːtl</span>

        <a type="sound" topic="b/bee/beetl/beetle__us_1.spx" resource="us_pron" 
backup-class="Media" class="fayin" href="sound://us/beetle__us_1.spx"><img 
src="us_pron.png" class="fayin"/></a>

        <span class="z_ei-g">/</span>

      </span>

    </span>

  </span>

</span>
'''
print(re.findall(r'(?:href|src)="([^"]+?\.(?:css|js|png|jpg))"', translation))
-- 
https://mail.python.org/mailman/listinfo/python-list

Reply via email to