This is an automated email from the ASF dual-hosted git repository.

mawiesne pushed a commit to branch cleanup/drop-wikinews-importer-component
in repository https://gitbox.apache.org/repos/asf/opennlp-sandbox.git

commit ae856879e4e5b5e91319feb3c4f5e9bb8bcfc313
Author: Martin Wiesner <[email protected]>
AuthorDate: Fri Mar 20 22:09:40 2026 +0100

    Drop wikinews-importer component
    - reason: Niche, no tests, no value, stale dependency (bliki-core 2013)
---
 pom.xml                                            |   1 -
 wikinews-importer/bin/converter                    |  21 --
 wikinews-importer/pom.xml                          |  79 -------
 .../FTC_begins_antitrust_inquiry_of_Google.xmi     |  77 ------
 .../Internet_hacking_group_LulzSec_disbands.xmi    |  23 --
 ...a_announces_troop_reductions_in_Afghanistan.xmi |  24 --
 ...i\304\207_said_to_be_too_ill_to_face_trial.xmi" |  28 ---
 wikinews-importer/samples/TypeSystem.xml           | 106 ---------
 .../samples/US_actor_Peter_Falk_dies_aged_83.xmi   |  22 --
 wikinews-importer/samples/wikinews.xml             | 107 ---------
 .../wikinews_importer/AnnotatingMarkupParser.java  | 262 ---------------------
 .../opennlp/wikinews_importer/Annotation.java      |  37 ---
 .../apache/opennlp/wikinews_importer/UimaUtil.java | 131 -----------
 .../wikinews_importer/WikinewsConverter.java       | 196 ---------------
 .../wikinews_importer/WikinewsWikiModel.java       |  48 ----
 15 files changed, 1162 deletions(-)

diff --git a/pom.xml b/pom.xml
index 90f6c33..d784dcc 100644
--- a/pom.xml
+++ b/pom.xml
@@ -111,7 +111,6 @@
         <module>tf-ner-poc</module>
         <module>summarizer</module>
         <module>tagging-server</module>
-        <module>wikinews-importer</module>
     </modules>
 
     <properties>
diff --git a/wikinews-importer/bin/converter b/wikinews-importer/bin/converter
deleted file mode 100755
index 3b0285c..0000000
--- a/wikinews-importer/bin/converter
+++ /dev/null
@@ -1,21 +0,0 @@
-#!/bin/sh
-
-#   Licensed to the Apache Software Foundation (ASF) under one
-#   or more contributor license agreements.  See the NOTICE file
-#   distributed with this work for additional information
-#   regarding copyright ownership.  The ASF licenses this file
-#   to you under the Apache License, Version 2.0 (the
-#   "License"); you may not use this file except in compliance
-#   with the License.  You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-#   Unless required by applicable law or agreed to in writing,
-#   software distributed under the License is distributed on an
-#   
-#  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-#   KIND, either express or implied.  See the License for the
-#   specific language governing permissions and limitations
-#   under the License.
-
-mvn -e -q exec:java 
"-Dexec.mainClass=org.apache.opennlp.wikinews_importer.WikinewsConverter" 
"-Dexec.args=$*"
diff --git a/wikinews-importer/pom.xml b/wikinews-importer/pom.xml
deleted file mode 100644
index fff20d1..0000000
--- a/wikinews-importer/pom.xml
+++ /dev/null
@@ -1,79 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-
-<!--
-   Licensed to the Apache Software Foundation (ASF) under one
-   or more contributor license agreements.  See the NOTICE file
-   distributed with this work for additional information
-   regarding copyright ownership.  The ASF licenses this file
-   to you under the Apache License, Version 2.0 (the
-   "License"); you may not use this file except in compliance
-   with the License.  You may obtain a copy of the License at
-
-     http://www.apache.org/licenses/LICENSE-2.0
-
-   Unless required by applicable law or agreed to in writing,
-   software distributed under the License is distributed on an
-   "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-   KIND, either express or implied.  See the License for the
-   specific language governing permissions and limitations
-   under the License.    
--->
-
-<project xmlns="http://maven.apache.org/POM/4.0.0"; 
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"; 
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 
http://maven.apache.org/maven-v4_0_0.xsd";>
-       <modelVersion>4.0.0</modelVersion>
-       <parent>
-               <groupId>org.apache.opennlp</groupId>
-               <artifactId>opennlp-sandbox</artifactId>
-               <version>3.0.0-SNAPSHOT</version>
-       </parent>
-
-       <artifactId>wikinews-importer</artifactId>
-       <packaging>jar</packaging>
-
-       <name>Apache OpenNLP Wikinews Importer</name>
-
-       <dependencies>
-
-           <dependency> 
-             <groupId>info.bliki.wiki</groupId> 
-             <artifactId>bliki-core</artifactId> 
-             <version>3.0.19</version>
-           </dependency>
-    
-       <dependency>
-                       <groupId>org.apache.uima</groupId>
-                       <artifactId>uimaj-core</artifactId>
-                       <version>${uimaj.version}</version>
-                       <scope>compile</scope>
-               </dependency>
-
-               <dependency>
-                       <groupId>org.junit.jupiter</groupId>
-                       <artifactId>junit-jupiter-api</artifactId>
-               </dependency>
-
-               <dependency>
-                       <groupId>org.junit.jupiter</groupId>
-                       <artifactId>junit-jupiter-engine</artifactId>
-               </dependency>
-
-               <dependency>
-                       <groupId>org.junit.jupiter</groupId>
-                       <artifactId>junit-jupiter-params</artifactId>
-               </dependency>
-       </dependencies>
-       
-       <build>
-               <plugins>
-                       <plugin>
-                               <groupId>org.apache.maven.plugins</groupId>
-                               <artifactId>maven-compiler-plugin</artifactId>
-                               <configuration>
-                                       
<source>${maven.compiler.source}</source>
-                                       
<target>${maven.compiler.target}</target>
-                                       
<compilerArgument>-Xlint</compilerArgument>
-                               </configuration>
-                       </plugin>
-               </plugins>
-       </build>
-</project>
\ No newline at end of file
diff --git 
a/wikinews-importer/samples/FTC_begins_antitrust_inquiry_of_Google.xmi 
b/wikinews-importer/samples/FTC_begins_antitrust_inquiry_of_Google.xmi
deleted file mode 100644
index 19340d5..0000000
--- a/wikinews-importer/samples/FTC_begins_antitrust_inquiry_of_Google.xmi
+++ /dev/null
@@ -1,77 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-
-<xmi:XMI xmlns:cas="http:///uima/cas.ecore"; 
xmlns:annotations="http:///org/apache/opennlp/annotations.ecore"; 
xmlns:xmi="http://www.omg.org/XMI"; xmlns:tcas="http:///uima/tcas.ecore"; 
xmi:version="2.0">
-    <cas:NULL xmi:id="0"/>
-    <cas:Sofa xmi:id="1" sofaNum="1" sofaID="_InitialView" mimeType="text" 
sofaString="FTC begins antitrust inquiry of Google&#10;&#10;Google has 
confirmed that it has &quot;received formal notification,&quot; that 
the&#10;Federal Trade Commission (FTC) is investigating its business 
practices.&#10;The acknowledgment was posted on the internet search engine 
company's blog&#10;Friday. Google said it was unclear about the nature of the 
probe.&#10;&#10;A broad FTC investigation would cause t [...]
-    <tcas:DocumentAnnotation xmi:id="8" sofa="1" begin="0" end="3039" 
language="x-unspecified"/>
-    <annotations:Headline xmi:id="13" sofa="1" begin="0" end="38"/>
-    <annotations:Sentence xmi:id="17" sofa="1" begin="40" end="185"/>
-    <annotations:Sentence xmi:id="21" sofa="1" begin="186" end="326"/>
-    <annotations:Sentence xmi:id="25" sofa="1" begin="328" end="529"/>
-    <annotations:Sentence xmi:id="29" sofa="1" begin="531" end="667"/>
-    <annotations:Sentence xmi:id="33" sofa="1" begin="861" end="1095"/>
-    <annotations:Sentence xmi:id="37" sofa="1" begin="669" end="860"/>
-    <annotations:Sentence xmi:id="41" sofa="1" begin="2851" end="3039"/>
-    <annotations:Sentence xmi:id="45" sofa="1" begin="2673" end="2849"/>
-    <annotations:Sentence xmi:id="49" sofa="1" begin="2231" end="2443"/>
-    <annotations:Sentence xmi:id="53" sofa="1" begin="2444" end="2671"/>
-    <annotations:Sentence xmi:id="57" sofa="1" begin="2148" end="2229"/>
-    <annotations:Sentence xmi:id="61" sofa="1" begin="1888" end="2147"/>
-    <annotations:Sentence xmi:id="65" sofa="1" begin="1568" end="1722"/>
-    <annotations:Sentence xmi:id="69" sofa="1" begin="1723" end="1886"/>
-    <annotations:Sentence xmi:id="73" sofa="1" begin="1097" end="1275"/>
-    <annotations:Sentence xmi:id="77" sofa="1" begin="1276" end="1407"/>
-    <annotations:Sentence xmi:id="81" sofa="1" begin="1408" end="1566"/>
-    <annotations:Organization xmi:id="85" sofa="1" begin="0" end="3"/>
-    <annotations:Organization xmi:id="89" sofa="1" begin="32" end="38"/>
-    <annotations:Organization xmi:id="93" sofa="1" begin="40" end="46"/>
-    <annotations:Organization xmi:id="97" sofa="1" begin="114" end="144"/>
-    <annotations:Organization xmi:id="101" sofa="1" begin="269" end="275"/>
-    <annotations:Organization xmi:id="105" sofa="1" begin="336" end="339"/>
-    <annotations:Organization xmi:id="109" sofa="1" begin="437" end="446"/>
-    <annotations:Organization xmi:id="113" sofa="1" begin="513" end="522"/>
-    <annotations:Organization xmi:id="117" sofa="1" begin="535" end="538"/>
-    <annotations:Organization xmi:id="121" sofa="1" begin="663" end="666"/>
-    <annotations:Organization xmi:id="125" sofa="1" begin="669" end="681"/>
-    <annotations:Organization xmi:id="129" sofa="1" begin="715" end="718"/>
-    <annotations:Organization xmi:id="133" sofa="1" begin="757" end="763"/>
-    <annotations:Organization xmi:id="137" sofa="1" begin="861" end="867"/>
-    <annotations:Organization xmi:id="141" sofa="1" begin="881" end="887"/>
-    <annotations:Organization xmi:id="145" sofa="1" begin="1101" end="1104"/>
-    <annotations:Organization xmi:id="149" sofa="1" begin="1139" end="1145"/>
-    <annotations:Organization xmi:id="153" sofa="1" begin="1289" end="1292"/>
-    <annotations:Organization xmi:id="157" sofa="1" begin="1295" end="1301"/>
-    <annotations:Organization xmi:id="161" sofa="1" begin="1398" end="1406"/>
-    <annotations:Organization xmi:id="165" sofa="1" begin="1559" end="1564"/>
-    <annotations:Organization xmi:id="169" sofa="1" begin="1568" end="1574"/>
-    <annotations:Organization xmi:id="173" sofa="1" begin="1699" end="1705"/>
-    <annotations:Organization xmi:id="177" sofa="1" begin="1764" end="1787"/>
-    <annotations:Organization xmi:id="181" sofa="1" begin="1797" end="1800"/>
-    <annotations:Organization xmi:id="185" sofa="1" begin="1865" end="1871"/>
-    <annotations:Organization xmi:id="189" sofa="1" begin="1971" end="1974"/>
-    <annotations:Organization xmi:id="193" sofa="1" begin="2056" end="2062"/>
-    <annotations:Organization xmi:id="197" sofa="1" begin="2231" end="2237"/>
-    <annotations:Organization xmi:id="201" sofa="1" begin="2292" end="2295"/>
-    <annotations:Organization xmi:id="205" sofa="1" begin="2418" end="2424"/>
-    <annotations:Organization xmi:id="209" sofa="1" begin="2507" end="2510"/>
-    <annotations:Organization xmi:id="213" sofa="1" begin="2673" end="2682"/>
-    <annotations:Organization xmi:id="217" sofa="1" begin="2701" end="2707"/>
-    <annotations:Organization xmi:id="221" sofa="1" begin="2851" end="2857"/>
-    <annotations:Organization xmi:id="225" sofa="1" begin="3035" end="3038"/>
-    <annotations:Organization xmi:id="229" sofa="1" begin="2963" end="2969"/>
-    <annotations:Person xmi:id="233" sofa="1" begin="1732" end="1745"/>
-    <annotations:Person xmi:id="237" sofa="1" begin="1888" end="1902"/>
-    <annotations:Person xmi:id="241" sofa="1" begin="2245" end="2257"/>
-    <annotations:Person xmi:id="245" sofa="1" begin="2448" end="2455"/>
-    <annotations:Paragraph xmi:id="249" sofa="1" begin="40" end="326"/>
-    <annotations:Paragraph xmi:id="253" sofa="1" begin="328" end="529"/>
-    <annotations:Paragraph xmi:id="257" sofa="1" begin="531" end="667"/>
-    <annotations:Paragraph xmi:id="261" sofa="1" begin="669" end="1095"/>
-    <annotations:Paragraph xmi:id="265" sofa="1" begin="1097" end="1566"/>
-    <annotations:Paragraph xmi:id="269" sofa="1" begin="1568" end="1886"/>
-    <annotations:Paragraph xmi:id="273" sofa="1" begin="1888" end="2229"/>
-    <annotations:Paragraph xmi:id="277" sofa="1" begin="2231" end="2671"/>
-    <annotations:Paragraph xmi:id="281" sofa="1" begin="2673" end="2849"/>
-    <annotations:Paragraph xmi:id="285" sofa="1" begin="2851" end="3039"/>
-    <cas:View sofa="1" members="8 13 17 21 25 29 33 37 41 45 49 53 57 61 65 69 
73 77 81 85 89 93 97 101 105 109 113 117 121 125 129 133 137 141 145 149 153 
157 161 165 169 173 177 181 185 189 193 197 201 205 209 213 217 221 225 229 233 
237 241 245 249 253 257 261 265 269 273 277 281 285"/>
-</xmi:XMI>
diff --git 
a/wikinews-importer/samples/Internet_hacking_group_LulzSec_disbands.xmi 
b/wikinews-importer/samples/Internet_hacking_group_LulzSec_disbands.xmi
deleted file mode 100644
index 8ebfa3d..0000000
--- a/wikinews-importer/samples/Internet_hacking_group_LulzSec_disbands.xmi
+++ /dev/null
@@ -1,23 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<xmi:XMI xmlns:cas="http:///uima/cas.ecore"; 
xmlns:annotations="http:///org/apache/opennlp/annotations.ecore"; 
xmlns:xmi="http://www.omg.org/XMI"; xmlns:tcas="http:///uima/tcas.ecore"; 
xmi:version="2.0">
-    <cas:NULL xmi:id="0"/>
-    <cas:Sofa xmi:id="1" sofaNum="1" sofaID="_InitialView" mimeType="text" 
sofaString="Internet hacking group LulzSec disbands&#10;&#10;The computer 
hacking organization Lulz Security disbanded yesterday, said the&#10;group in a 
statement. Released via Pastebin, it states &quot;[o]ur planned 50 
day&#10;cruise has expired, and we must now sail into the 
distance.&quot;&#10;&#10;The announcement comes a day after The Guardian 
released leaked IRC logs of&#10;private conversations between Lul [...]
-    <tcas:DocumentAnnotation xmi:id="8" sofa="1" begin="0" end="1286" 
language="x-unspecified"/>
-    <annotations:Headline xmi:id="13" sofa="1" begin="0" end="39"/>
-    <annotations:Paragraph xmi:id="17" sofa="1" begin="41" end="256"/>
-    <annotations:Paragraph xmi:id="21" sofa="1" begin="258" end="455"/>
-    <annotations:Paragraph xmi:id="25" sofa="1" begin="457" end="748"/>
-    <annotations:Paragraph xmi:id="29" sofa="1" begin="750" end="1167"/>
-    <annotations:Paragraph xmi:id="33" sofa="1" begin="1169" end="1285"/>
-    <annotations:Sentence xmi:id="37" sofa="1" begin="141" end="256"/>
-    <annotations:Sentence xmi:id="41" sofa="1" begin="41" end="140"/>
-    <annotations:Sentence xmi:id="45" sofa="1" begin="258" end="455"/>
-    <annotations:Sentence xmi:id="49" sofa="1" begin="666" end="748"/>
-    <annotations:Sentence xmi:id="53" sofa="1" begin="537" end="665"/>
-    <annotations:Sentence xmi:id="57" sofa="1" begin="457" end="536"/>
-    <annotations:Sentence xmi:id="61" sofa="1" begin="750" end="856"/>
-    <annotations:Sentence xmi:id="65" sofa="1" begin="857" end="959"/>
-    <annotations:Sentence xmi:id="69" sofa="1" begin="960" end="1167"/>
-    <annotations:Sentence xmi:id="73" sofa="1" begin="1169" end="1285"/>
-    <cas:View sofa="1" members="8 13 17 21 25 29 33 37 41 45 49 53 57 61 65 69 
73"/>
-</xmi:XMI>
diff --git 
a/wikinews-importer/samples/Obama_announces_troop_reductions_in_Afghanistan.xmi 
b/wikinews-importer/samples/Obama_announces_troop_reductions_in_Afghanistan.xmi
deleted file mode 100644
index ad4f430..0000000
--- 
a/wikinews-importer/samples/Obama_announces_troop_reductions_in_Afghanistan.xmi
+++ /dev/null
@@ -1,24 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<xmi:XMI xmlns:cas="http:///uima/cas.ecore"; 
xmlns:annotations="http:///org/apache/opennlp/annotations.ecore"; 
xmlns:xmi="http://www.omg.org/XMI"; xmlns:tcas="http:///uima/tcas.ecore"; 
xmi:version="2.0">
-    <cas:NULL xmi:id="0"/>
-    <cas:Sofa xmi:id="1" sofaNum="1" sofaID="_InitialView" mimeType="text" 
sofaString="Obama announces troop reductions in Afghanistan&#10;&#10;A third of 
U.S. forces in Afghanistan are to be withdrawn&#10;from the country by the end 
of next year, president Barack Obama&#10;has announced. In a televised 
statement on Wednesday evening,&#10;Obama announced 33,000 soldiers would be 
withdrawn from the country&#10;by the summer of next year, and declared the 
U.S. had beaten&#10;al-Qaeda and t [...]
-    <tcas:DocumentAnnotation xmi:id="8" sofa="1" begin="0" end="1807" 
language="x-unspecified"/>
-    <annotations:Headline xmi:id="13" sofa="1" begin="0" end="47"/>
-    <annotations:Paragraph xmi:id="17" sofa="1" begin="49" end="388"/>
-    <annotations:Paragraph xmi:id="21" sofa="1" begin="390" end="807"/>
-    <annotations:Paragraph xmi:id="25" sofa="1" begin="809" end="1259"/>
-    <annotations:Paragraph xmi:id="29" sofa="1" begin="1261" end="1484"/>
-    <annotations:Paragraph xmi:id="33" sofa="1" begin="1486" end="1646"/>
-    <annotations:Paragraph xmi:id="37" sofa="1" begin="1648" end="1806"/>
-    <annotations:Sentence xmi:id="41" sofa="1" begin="49" end="186"/>
-    <annotations:Sentence xmi:id="45" sofa="1" begin="187" end="388"/>
-    <annotations:Sentence xmi:id="49" sofa="1" begin="390" end="626"/>
-    <annotations:Sentence xmi:id="53" sofa="1" begin="627" end="807"/>
-    <annotations:Sentence xmi:id="57" sofa="1" begin="809" end="1058"/>
-    <annotations:Sentence xmi:id="61" sofa="1" begin="1059" end="1259"/>
-    <annotations:Sentence xmi:id="65" sofa="1" begin="1261" end="1484"/>
-    <annotations:Sentence xmi:id="69" sofa="1" begin="1587" end="1646"/>
-    <annotations:Sentence xmi:id="73" sofa="1" begin="1486" end="1586"/>
-    <annotations:Sentence xmi:id="77" sofa="1" begin="1648" end="1806"/>
-    <cas:View sofa="1" members="8 13 17 21 25 29 33 37 41 45 49 53 57 61 65 69 
73 77"/>
-</xmi:XMI>
diff --git 
"a/wikinews-importer/samples/Ratko_Mladi\304\207_said_to_be_too_ill_to_face_trial.xmi"
 
"b/wikinews-importer/samples/Ratko_Mladi\304\207_said_to_be_too_ill_to_face_trial.xmi"
deleted file mode 100644
index cfa560b..0000000
--- 
"a/wikinews-importer/samples/Ratko_Mladi\304\207_said_to_be_too_ill_to_face_trial.xmi"
+++ /dev/null
@@ -1,28 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<xmi:XMI xmlns:cas="http:///uima/cas.ecore"; 
xmlns:annotations="http:///org/apache/opennlp/annotations.ecore"; 
xmlns:xmi="http://www.omg.org/XMI"; xmlns:tcas="http:///uima/tcas.ecore"; 
xmi:version="2.0">
-    <cas:NULL xmi:id="0"/>
-    <cas:Sofa xmi:id="1" sofaNum="1" sofaID="_InitialView" mimeType="text" 
sofaString="Ratko Mladić said to be too ill to face trial&#10;&#10;Ratko 
Mladić, a former Bosnian Serb general, is allegedly too ill to&#10;face trial 
for war crimes. According to his lawyer, 69-year-old Mladić&#10;will not 
survive to see the start of proceedings.&#10;&#10;Concerns for the health of 
Mladić come despite a Serbian judge having&#10;ruled him fit for extradition to 
the UN war crimes tribunal in The Ha [...]
-    <tcas:DocumentAnnotation xmi:id="8" sofa="1" begin="0" end="1618" 
language="x-unspecified"/>
-    <annotations:Headline xmi:id="13" sofa="1" begin="0" end="45"/>
-    <annotations:Paragraph xmi:id="17" sofa="1" begin="47" end="236"/>
-    <annotations:Paragraph xmi:id="21" sofa="1" begin="238" end="441"/>
-    <annotations:Paragraph xmi:id="25" sofa="1" begin="443" end="593"/>
-    <annotations:Paragraph xmi:id="29" sofa="1" begin="595" end="722"/>
-    <annotations:Paragraph xmi:id="33" sofa="1" begin="724" end="906"/>
-    <annotations:Paragraph xmi:id="37" sofa="1" begin="908" end="1173"/>
-    <annotations:Paragraph xmi:id="45" sofa="1" begin="1483" end="1617"/>
-    <annotations:Paragraph xmi:id="101" sofa="1" begin="1175" end="1481"/>
-    <annotations:Sentence xmi:id="49" sofa="1" begin="143" end="236"/>
-    <annotations:Sentence xmi:id="53" sofa="1" begin="47" end="142"/>
-    <annotations:Sentence xmi:id="57" sofa="1" begin="238" end="441"/>
-    <annotations:Sentence xmi:id="61" sofa="1" begin="443" end="593"/>
-    <annotations:Sentence xmi:id="65" sofa="1" begin="595" end="722"/>
-    <annotations:Sentence xmi:id="69" sofa="1" begin="850" end="906"/>
-    <annotations:Sentence xmi:id="73" sofa="1" begin="724" end="849"/>
-    <annotations:Sentence xmi:id="77" sofa="1" begin="908" end="1173"/>
-    <annotations:Sentence xmi:id="81" sofa="1" begin="1395" end="1481"/>
-    <annotations:Sentence xmi:id="85" sofa="1" begin="1277" end="1394"/>
-    <annotations:Sentence xmi:id="89" sofa="1" begin="1175" end="1276"/>
-    <annotations:Sentence xmi:id="93" sofa="1" begin="1483" end="1617"/>
-    <cas:View sofa="1" members="8 13 17 21 25 29 33 37 45 101 49 53 57 61 65 
69 73 77 81 85 89 93"/>
-</xmi:XMI>
diff --git a/wikinews-importer/samples/TypeSystem.xml 
b/wikinews-importer/samples/TypeSystem.xml
deleted file mode 100644
index 75180a4..0000000
--- a/wikinews-importer/samples/TypeSystem.xml
+++ /dev/null
@@ -1,106 +0,0 @@
-<?xml version="1.0" encoding="UTF-8" ?>
-
-       <!--
-        ***************************************************************
-        * Licensed to the Apache Software Foundation (ASF) under one
-        * or more contributor license agreements.  See the NOTICE file
-        * distributed with this work for additional information
-        * regarding copyright ownership.  The ASF licenses this file
-        * to you under the Apache License, Version 2.0 (the
-        * "License"); you may not use this file except in compliance
-        * with the License.  You may obtain a copy of the License at
-         *
-        *   http://www.apache.org/licenses/LICENSE-2.0
-        * 
-        * Unless required by applicable law or agreed to in writing,
-        * software distributed under the License is distributed on an
-        * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-        * KIND, either express or implied.  See the License for the
-        * specific language governing permissions and limitations
-        * under the License.
-        ***************************************************************
-   -->
-   
-<typeSystemDescription  xmlns="http://uima.apache.org/resourceSpecifier";>
-       <name>WikinewsTypeSystem</name>
-       <description>Wikinews Sample Type System Definition</description>
-       <vendor>The Apache Software Foundation</vendor>
-       <version>1.0</version>
-       <types>
-               <typeDescription>
-                       <name>org.apache.opennlp.annotations.Headline</name>
-                       <description></description>
-                       <supertypeName>uima.tcas.Annotation</supertypeName>
-               </typeDescription>
-
-               <typeDescription>
-                       <name>org.apache.opennlp.annotations.SubHeadline</name>
-                       <description></description>
-                       <supertypeName>uima.tcas.Annotation</supertypeName>
-               </typeDescription>
-
-               <typeDescription>
-                       <name>org.apache.opennlp.annotations.Paragraph</name>
-                       <description></description>
-                       <supertypeName>uima.tcas.Annotation</supertypeName>
-               </typeDescription>
-
-               <typeDescription>
-                       <name>org.apache.opennlp.annotations.Sentence</name>
-                       <description></description>
-                       <supertypeName>uima.tcas.Annotation</supertypeName>
-               </typeDescription>
-
-               <typeDescription>
-                       <name>org.apache.opennlp.annotations.Token</name>
-                       <description></description>
-                       <supertypeName>uima.tcas.Annotation</supertypeName>
-               </typeDescription>
-
-               <typeDescription>
-                       <name>org.apache.opennlp.annotations.Person</name>
-                       <description></description>
-                       <supertypeName>uima.tcas.Annotation</supertypeName>
-               </typeDescription>
-
-               <typeDescription>
-                       <name>org.apache.opennlp.annotations.Organization</name>
-                       <description></description>
-                       <supertypeName>uima.tcas.Annotation</supertypeName>
-               </typeDescription>
-
-               <typeDescription>
-                       <name>org.apache.opennlp.annotations.WikiLink</name>
-                       <supertypeName>uima.tcas.Annotation</supertypeName>
-                       <features>
-                               <featureDescription>
-                                       <name>link</name>
-                                       <description></description>
-                                       
<rangeTypeName>uima.cas.String</rangeTypeName>
-                               </featureDescription>
-                       </features>
-               </typeDescription>
-               <typeDescription>
-                       <name>bumblebee.annotations.AnnotationStatus</name>
-                       <supertypeName>uima.tcas.Annotation</supertypeName>
-                       <features>
-                               <featureDescription>
-                                       <name>sentence</name>
-                                       
<rangeTypeName>uima.cas.Boolean</rangeTypeName>
-                               </featureDescription>
-                               <featureDescription>
-                                       <name>token</name>
-                                       
<rangeTypeName>uima.cas.Boolean</rangeTypeName>
-                               </featureDescription>
-                               <featureDescription>
-                                       <name>person</name>
-                                       
<rangeTypeName>uima.cas.Boolean</rangeTypeName>
-                               </featureDescription>
-                               <featureDescription>
-                                       <name>organization</name>
-                                       
<rangeTypeName>uima.cas.Boolean</rangeTypeName>
-                               </featureDescription>
-                       </features>
-               </typeDescription>              
-       </types>
-</typeSystemDescription>
\ No newline at end of file
diff --git a/wikinews-importer/samples/US_actor_Peter_Falk_dies_aged_83.xmi 
b/wikinews-importer/samples/US_actor_Peter_Falk_dies_aged_83.xmi
deleted file mode 100644
index 4177b48..0000000
--- a/wikinews-importer/samples/US_actor_Peter_Falk_dies_aged_83.xmi
+++ /dev/null
@@ -1,22 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<xmi:XMI xmlns:cas="http:///uima/cas.ecore"; 
xmlns:annotations="http:///org/apache/opennlp/annotations.ecore"; 
xmlns:xmi="http://www.omg.org/XMI"; xmlns:tcas="http:///uima/tcas.ecore"; 
xmi:version="2.0">
-    <cas:NULL xmi:id="0"/>
-    <cas:Sofa xmi:id="1" sofaNum="1" sofaID="_InitialView" mimeType="text" 
sofaString="US actor Peter Falk dies aged 83&#10;&#10;US actor Peter Falk died 
on Thursday at his home in Beverly Hills, California&#10;after a struggle with 
Alzheimer's disease. Falk may be most known for his&#10;role as Detective 
Columbo in the television series of the same name that ran&#10;on NBC from 1968 
to 1978, which moved to ABC in 1989. The last episode aired&#10;in 2003. His 
portrayal of the character w [...]
-    <tcas:DocumentAnnotation xmi:id="8" sofa="1" begin="0" end="902" 
language="x-unspecified"/>
-    <annotations:Headline xmi:id="13" sofa="1" begin="0" end="32"/>
-    <annotations:Paragraph xmi:id="17" sofa="1" begin="34" end="405"/>
-    <annotations:Paragraph xmi:id="21" sofa="1" begin="407" end="636"/>
-    <annotations:Paragraph xmi:id="25" sofa="1" begin="638" end="852"/>
-    <annotations:Paragraph xmi:id="29" sofa="1" begin="854" end="901"/>
-    <annotations:Sentence xmi:id="33" sofa="1" begin="554" end="636"/>
-    <annotations:Sentence xmi:id="37" sofa="1" begin="34" end="154"/>
-    <annotations:Sentence xmi:id="41" sofa="1" begin="155" end="316"/>
-    <annotations:Sentence xmi:id="45" sofa="1" begin="317" end="348"/>
-    <annotations:Sentence xmi:id="49" sofa="1" begin="349" end="405"/>
-    <annotations:Sentence xmi:id="53" sofa="1" begin="407" end="511"/>
-    <annotations:Sentence xmi:id="57" sofa="1" begin="512" end="553"/>
-    <annotations:Sentence xmi:id="61" sofa="1" begin="638" end="746"/>
-    <annotations:Sentence xmi:id="65" sofa="1" begin="747" end="852"/>
-    <annotations:Sentence xmi:id="69" sofa="1" begin="854" end="901"/>
-    <cas:View sofa="1" members="8 13 17 21 25 29 33 37 41 45 49 53 57 61 65 
69"/>
-</xmi:XMI>
diff --git a/wikinews-importer/samples/wikinews.xml 
b/wikinews-importer/samples/wikinews.xml
deleted file mode 100644
index eb98409..0000000
--- a/wikinews-importer/samples/wikinews.xml
+++ /dev/null
@@ -1,107 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-
-<!--
-   Licensed to the Apache Software Foundation (ASF) under one
-   or more contributor license agreements.  See the NOTICE file
-   distributed with this work for additional information
-   regarding copyright ownership.  The ASF licenses this file
-   to you under the Apache License, Version 2.0 (the
-   "License"); you may not use this file except in compliance
-   with the License.  You may obtain a copy of the License at
-
-     http://www.apache.org/licenses/LICENSE-2.0
-
-   Unless required by applicable law or agreed to in writing,
-   software distributed under the License is distributed on an
-   "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-   KIND, either express or implied.  See the License for the
-   specific language governing permissions and limitations
-   under the License.
--->
-
-<fields xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance";
-       xsi:noNamespaceSchemaLocation="../../main/resources/lucas.xsd">
-
-       <field name="headline" index="yes" stored="no">
-               <annotations>
-                       <annotation 
type="org.apache.opennlp.annotations.Headline" tokenizer="standard">
-                               <filters>
-                                       <filter name="lowercase"/>
-                               </filters>
-                       </annotation>
-               </annotations>
-       </field>
-       
-       <field name="text" index="yes" stored="no">
-               <annotations>
-                       <annotation type="uima.tcas.DocumentAnnotation" 
tokenizer="standard">
-                               <filters>
-                                       <filter name="lowercase"/>
-                               </filters>
-                       </annotation>
-                       
-               </annotations>
-       </field>
-       
-       <field name="person" index="yes" stored="no">
-               <annotations>
-                       <annotation 
type="org.apache.opennlp.annotations.Person" tokenizer="standard">
-                               <filters>
-                                       <filter name="lowercase"/>
-                               </filters>
-                       </annotation>
-                       
-               </annotations>
-       </field>
-       
-       <field name="organization" index="yes" stored="no">
-               <annotations>
-                       <annotation 
type="org.apache.opennlp.annotations.Organization" tokenizer="standard">
-                               <filters>
-                                       <filter name="lowercase"/>
-                               </filters>
-                       </annotation>
-                       
-               </annotations>
-       </field>
-       
-       <field name="status-sentence" index="yes" stored="no" termVector="no">
-               <annotations>
-                       <annotation 
type="bumblebee.annotations.AnnotationStatus" tokenizer="standard">
-                               <features>
-                                       <feature name="sentence"/>
-                               </features>
-                       </annotation>
-               </annotations>
-       </field>
-       
-       <field name="status-token" index="yes" stored="no" termVector="no">
-               <annotations>
-                       <annotation 
type="bumblebee.annotations.AnnotationStatus">
-                               <features>
-                                       <feature name="token"/>
-                               </features>
-                       </annotation>
-               </annotations>
-       </field>
-       
-       <field name="status-person" index="yes" stored="yes">
-               <annotations>
-                       <annotation 
type="bumblebee.annotations.AnnotationStatus">
-                               <features>
-                                       <feature name="token"/>
-                               </features>
-                       </annotation>
-               </annotations>
-       </field>
-       
-       <field name="status-organization" index="yes">
-               <annotations>
-                       <annotation 
type="bumblebee.annotations.AnnotationStatus">
-                               <features>
-                                       <feature name="token"/>
-                               </features>
-                       </annotation>
-               </annotations>
-       </field>
-</fields>
diff --git 
a/wikinews-importer/src/main/java/org/apache/opennlp/wikinews_importer/AnnotatingMarkupParser.java
 
b/wikinews-importer/src/main/java/org/apache/opennlp/wikinews_importer/AnnotatingMarkupParser.java
deleted file mode 100644
index 2624ae7..0000000
--- 
a/wikinews-importer/src/main/java/org/apache/opennlp/wikinews_importer/AnnotatingMarkupParser.java
+++ /dev/null
@@ -1,262 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.opennlp.wikinews_importer;
-
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.List;
-import java.util.Map;
-import java.util.Set;
-
-import info.bliki.htmlcleaner.ContentToken;
-import info.bliki.htmlcleaner.TagNode;
-import info.bliki.wiki.filter.ITextConverter;
-import info.bliki.wiki.filter.WPList;
-import info.bliki.wiki.filter.WPTable;
-import info.bliki.wiki.model.Configuration;
-import info.bliki.wiki.model.IWikiModel;
-import info.bliki.wiki.model.ImageFormat;
-import info.bliki.wiki.model.WikiModel;
-import info.bliki.wiki.tags.WPATag;
-
-/**
- * Parse mediawiki markup to strip the formatting info and extract a simple 
text
- * version suitable for NLP along with header, paragraph and link position
- * annotations.
- * <p>
- * Use the {@code #convert(String)} and {@code #getWikiLinks()} methods.
- * <p>
- * Due to the constraints imposed by the {@link ITextConverter} /
- * {@link WikiModel} API, this class is not thread safe: only one instance
- * should be run by thread.
- */
-public class AnnotatingMarkupParser implements ITextConverter {
-
-    private static final String HREF_ATTR_KEY = "href";
-
-    private static final String WIKILINK_TITLE_ATTR_KEY = "title";
-    private static final String WIKILINK_TARGET_ATTR_KEY = "href";
-    private static final String WIKIOBJECT_ATTR_KEY = "wikiobject";
-
-    private static final Set<String> PARAGRAPH_TAGS = Set.of("p");
-    private static final Set<String> HEADING_TAGS = Set.of("h1", "h2", "h3", 
"h4", "h5", "h6");
-
-    private final List<Annotation> wikilinks = new ArrayList<>();
-    private final List<Annotation> headers = new ArrayList<>();
-    private final List<Annotation> paragraphs = new ArrayList<>();
-
-    private String languageCode = "en";
-
-    private final WikiModel model;
-
-    private String redirect;
-
-    private String text;
-
-    public AnnotatingMarkupParser() {
-        model = makeWikiModel(languageCode);
-    }
-
-    public AnnotatingMarkupParser(String languageCode) {
-        this.languageCode = languageCode;
-        model = makeWikiModel(languageCode);
-    }
-
-    public WikiModel makeWikiModel(String langCode) {
-        return new 
WikiModel(String.format("https:/%s.wikipedia.org/wiki/${image}", langCode),
-                String.format("https://%s.wikipedia.org/wiki/${title}";, 
langCode)) {
-            @Override
-            public String getRawWikiContent(String namespace,
-                    String articleName, Map<String, String> 
templateParameters) {
-                // disable template support
-                // TODO: we need to re-add template support at least for dates
-                return "";
-            }
-        };
-    }
-
-    @Override
-    public void nodesToText(List<?> nodes, Appendable buffer, IWikiModel 
model) throws IOException {
-        CountingAppendable countingBuffer;
-        if (buffer instanceof CountingAppendable) {
-            countingBuffer = (CountingAppendable) buffer;
-        } else {
-            // wrap
-            countingBuffer = new CountingAppendable(buffer);
-        }
-
-        if (nodes != null && !nodes.isEmpty()) {
-            try {
-                int level = model.incrementRecursionLevel();
-                if (level > Configuration.RENDERER_RECURSION_LIMIT) {
-                    countingBuffer.append("Error - recursion limit exceeded"
-                            + " rendering tags in 
PlainTextConverter#nodesToText().");
-                    return;
-                }
-                for (Object node : nodes) {
-                    if (node instanceof WPATag tag) {
-                        // extract wikilink annotations
-                        String wikilinkLabel = 
tag.getAttributes().get(WIKILINK_TITLE_ATTR_KEY);
-                        String wikilinkTarget = 
tag.getAttributes().get(WIKILINK_TARGET_ATTR_KEY);
-                        if (wikilinkLabel != null) {
-                            int colonIdx = -1; // wikilinkLabel.indexOf(':');
-                            if (colonIdx == -1) {
-                                // do not serialize non-topic wiki-links such 
as
-                                // translation links missing from the
-                                // INTERWIKI_LINK map
-                                int start = countingBuffer.currentPosition;
-                                tag.getBodyString(countingBuffer);
-                                int end = countingBuffer.currentPosition;
-                                if (!wikilinkTarget.startsWith("#")) {
-                                  // TODO: wikilink label is not 
important,since that is the covered text?
-                                    wikilinks.add(new Annotation(start, end, 
wikilinkLabel, wikilinkTarget));
-                                }
-                            }
-                        } else {
-                            tag.getBodyString(countingBuffer);
-                        }
-
-                    } else if (node instanceof ContentToken contentToken) {
-                        countingBuffer.append(contentToken.getContent());
-                    } else if (node instanceof List) {
-                    } else if (node instanceof WPList) {
-                    } else if (node instanceof WPTable) {
-                        // ignore lists and tables since they most of the time
-                        // do not hold grammatically correct
-                        // interesting sentences that are representative of the
-                        // language.
-                    } else if (node instanceof TagNode tagNode) {
-                        Map<String, String> attributes = 
tagNode.getAttributes();
-                        Map<String, Object> oAttributes = 
tagNode.getObjectAttributes();
-                        boolean hasSpecialHandling = false;
-                        String tagName = tagNode.getName();
-                        int tagBegin = countingBuffer.currentPosition;
-                        
-                        if ("ref".equals(tagName)) {
-                            // ignore the references since they do not hold
-                            // interesting text content
-                            hasSpecialHandling = true;
-                        } else if (oAttributes != null
-                                && oAttributes.get(WIKIOBJECT_ATTR_KEY) 
instanceof ImageFormat) {
-                            // the caption of images often holds well-formed
-                            // sentences with links to entities
-                            hasSpecialHandling = true;
-                            ImageFormat iformat = (ImageFormat) 
oAttributes.get(WIKIOBJECT_ATTR_KEY);
-                            imageNodeToText(tagNode, iformat, countingBuffer, 
model);
-                        }
-                        if (!hasSpecialHandling) {
-                            nodesToText(tagNode.getChildren(), countingBuffer, 
model);
-                        }
-                        if (PARAGRAPH_TAGS.contains(tagName)) {
-                            paragraphs.add(new Annotation(tagBegin,
-                                    countingBuffer.currentPosition, 
"paragraph", tagName));
-                            countingBuffer.append("\n\n");
-                        } else if (HEADING_TAGS.contains(tagName)) {
-                            headers.add(new Annotation(tagBegin,
-                                countingBuffer.currentPosition, "heading", 
tagName));
-                            countingBuffer.append("\n\n");
-                        } else if ("a".equals(tagName)) {
-                          String href = attributes.get(HREF_ATTR_KEY);
-                          
-                          // TODO: How to get covered text here? Is not needed 
anyway right?!
-                          wikilinks.add(new Annotation(tagBegin, 
countingBuffer.currentPosition,
-                              "", href));
-                        }
-                          
-                    }
-                }
-            } finally {
-                model.decrementRecursionLevel();
-            }
-        }
-    }
-
-    @Override
-    public void imageNodeToText(TagNode tagNode, ImageFormat imageFormat,
-            Appendable buffer, IWikiModel model) throws IOException {
-//        nodesToText(tagNode.getChildren(), buffer, model);
-    }
-
-    @Override
-    public boolean noLinks() {
-        return true;
-    }
-
-    public List<Annotation> getWikiLinkAnnotations() {
-        return wikilinks;
-    }
-
-    public List<Annotation> getHeaderAnnotations() {
-        return headers;
-    }
-
-    public List<Annotation> getParagraphAnnotations() {
-        return paragraphs;
-    }
-
-    public List<String> getParagraphs() {
-        List<String> texts = new ArrayList<>();
-        for (Annotation p : paragraphs) {
-            texts.add(text.substring(p.begin, p.end));
-        }
-        return texts;
-    }
-
-    public List<String> getHeaders() {
-        List<String> texts = new ArrayList<>();
-        for (Annotation h : headers) {
-            texts.add(text.substring(h.begin, h.end));
-        }
-        return texts;
-    }
-
-    public String getRedirect() {
-        return redirect;
-    }
-
-    public static class CountingAppendable implements Appendable {
-
-        public int currentPosition = 0;
-
-        final protected Appendable wrappedBuffer;
-
-        public CountingAppendable(Appendable wrappedBuffer) {
-            this.wrappedBuffer = wrappedBuffer;
-        }
-
-        @Override
-        public Appendable append(CharSequence charSeq) throws IOException {
-            currentPosition += charSeq.length();
-            return wrappedBuffer.append(charSeq);
-        }
-
-        @Override
-        public Appendable append(char aChar) throws IOException {
-            currentPosition += 1;
-            return wrappedBuffer.append(aChar);
-        }
-
-        @Override
-        public Appendable append(CharSequence charSeq, int start, int end) 
throws IOException {
-            currentPosition += end - start;
-            return wrappedBuffer.append(charSeq, start, end);
-        }
-
-    }
-
-}
diff --git 
a/wikinews-importer/src/main/java/org/apache/opennlp/wikinews_importer/Annotation.java
 
b/wikinews-importer/src/main/java/org/apache/opennlp/wikinews_importer/Annotation.java
deleted file mode 100644
index 7be95fa..0000000
--- 
a/wikinews-importer/src/main/java/org/apache/opennlp/wikinews_importer/Annotation.java
+++ /dev/null
@@ -1,37 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.opennlp.wikinews_importer;
-
-public class Annotation {
-
-    public final int begin;
-
-    public final int end;
-
-    public final String label;
-    
-    public final String value;
-
-    public Annotation(int start, int end, String label, String value) {
-        this.begin = start;
-        this.end = end;
-        this.label = label;
-        this.value = value;
-    }
-
-}
diff --git 
a/wikinews-importer/src/main/java/org/apache/opennlp/wikinews_importer/UimaUtil.java
 
b/wikinews-importer/src/main/java/org/apache/opennlp/wikinews_importer/UimaUtil.java
deleted file mode 100644
index 3039e10..0000000
--- 
a/wikinews-importer/src/main/java/org/apache/opennlp/wikinews_importer/UimaUtil.java
+++ /dev/null
@@ -1,131 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.opennlp.wikinews_importer;
-
-import java.io.File;
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.OutputStream;
-
-import javax.xml.parsers.ParserConfigurationException;
-import javax.xml.parsers.SAXParser;
-import javax.xml.parsers.SAXParserFactory;
-
-import org.apache.uima.ResourceSpecifierFactory;
-import org.apache.uima.UIMAFramework;
-import org.apache.uima.cas.CAS;
-import org.apache.uima.cas.impl.XmiCasDeserializer;
-import org.apache.uima.cas.impl.XmiCasSerializer;
-import org.apache.uima.resource.ResourceInitializationException;
-import org.apache.uima.resource.metadata.FsIndexDescription;
-import org.apache.uima.resource.metadata.TypePriorities;
-import org.apache.uima.resource.metadata.TypeSystemDescription;
-import org.apache.uima.resource.metadata.impl.FsIndexDescription_impl;
-import org.apache.uima.util.CasCreationUtils;
-import org.apache.uima.util.InvalidXMLException;
-import org.apache.uima.util.XMLInputSource;
-import org.apache.uima.util.XMLParser;
-import org.apache.uima.util.XMLSerializer;
-import org.xml.sax.SAXException;
-
-public class UimaUtil {
-
-  static TypeSystemDescription createTypeSystemDescription(InputStream in) {
-
-    // Note:
-    // Type System location is not set correctly,
-    // resolving a referenced type system will fail
-
-    XMLInputSource xmlTypeSystemSource = new XMLInputSource(in, new File(""));
-
-    XMLParser xmlParser = UIMAFramework.getXMLParser();
-
-    TypeSystemDescription typeSystemDescriptor;
-
-    try {
-      typeSystemDescriptor = (TypeSystemDescription) 
xmlParser.parse(xmlTypeSystemSource);
-
-      typeSystemDescriptor.resolveImports();
-    } catch (InvalidXMLException e) {
-      e.printStackTrace();
-      typeSystemDescriptor = null;
-    }
-
-    return typeSystemDescriptor;
-  }
-
-  static CAS createEmptyCAS(TypeSystemDescription typeSystem) {
-    ResourceSpecifierFactory resourceSpecifierFactory = UIMAFramework
-        .getResourceSpecifierFactory();
-    TypePriorities typePriorities = resourceSpecifierFactory
-        .createTypePriorities();
-
-    FsIndexDescription indexDesciptor = new FsIndexDescription_impl();
-    indexDesciptor.setLabel("TOPIndex");
-    indexDesciptor.setTypeName("uima.cas.TOP");
-    indexDesciptor.setKind(FsIndexDescription.KIND_SORTED);
-
-    CAS cas;
-    try {
-      cas = CasCreationUtils.createCas(typeSystem, typePriorities,
-          new FsIndexDescription[] { indexDesciptor });
-    } catch (ResourceInitializationException e) {
-      e.printStackTrace();
-      cas = null;
-    }
-
-    return cas;
-  }
-
-  static void deserializeXmiCAS(CAS cas, InputStream xmiIn) throws IOException 
{
-
-    SAXParserFactory saxParserFactory = SAXParserFactory.newInstance();
-    saxParserFactory.setValidating(false);
-
-    SAXParser saxParser;
-
-    try {
-      saxParser = saxParserFactory.newSAXParser();
-    } catch (ParserConfigurationException e) {
-      throw new IllegalStateException(
-          "SAXParser should be configured correctly!", e);
-    } catch (SAXException e) {
-      throw new IllegalStateException("SAX error while creating parser!", e);
-    }
-
-    XmiCasDeserializer deserializer = new 
XmiCasDeserializer(cas.getTypeSystem());
-
-    try {
-      saxParser.parse(xmiIn, deserializer.getXmiCasHandler(cas));
-    } catch (SAXException e) {
-      throw new IOException("Invalid XMI input!", e);
-    }
-  }
-  
-  static void serializeCASToXmi(CAS cas, OutputStream out) throws IOException {
-    XmiCasSerializer xmiSerializer = new XmiCasSerializer(cas.getTypeSystem());
-
-    XMLSerializer xmlSerialzer = new XMLSerializer(out, true);  
-    
-    try {
-      xmiSerializer.serialize(cas, xmlSerialzer.getContentHandler());
-    } catch (SAXException e) {
-      e.printStackTrace();
-    }
-  }
-}
diff --git 
a/wikinews-importer/src/main/java/org/apache/opennlp/wikinews_importer/WikinewsConverter.java
 
b/wikinews-importer/src/main/java/org/apache/opennlp/wikinews_importer/WikinewsConverter.java
deleted file mode 100644
index 9c03e74..0000000
--- 
a/wikinews-importer/src/main/java/org/apache/opennlp/wikinews_importer/WikinewsConverter.java
+++ /dev/null
@@ -1,196 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.opennlp.wikinews_importer;
-
-import java.io.File;
-import java.io.FileInputStream;
-import java.io.FileOutputStream;
-import java.io.IOException;
-import java.io.OutputStream;
-import java.net.URLEncoder;
-import java.nio.charset.StandardCharsets;
-import java.util.ArrayList;
-import java.util.List;
-
-import info.bliki.wiki.dump.IArticleFilter;
-import info.bliki.wiki.dump.Siteinfo;
-import info.bliki.wiki.dump.WikiArticle;
-import info.bliki.wiki.dump.WikiXMLParser;
-
-import org.apache.uima.cas.CAS;
-import org.apache.uima.cas.Feature;
-import org.apache.uima.cas.Type;
-import org.apache.uima.cas.text.AnnotationFS;
-import org.apache.uima.resource.metadata.TypeSystemDescription;
-import org.xml.sax.SAXException;
-
-/**
- * Demo application which reads an uncompressed Wikipedia XML dump
- * file and writes each article as an XMI file.
- */
-public class WikinewsConverter {
-
-  static class CASArticleFilter implements IArticleFilter {
-
-    private final TypeSystemDescription tsDesc;
-    private final File outputFolder;
-    private final List<String> endOfArticleMarkers = new ArrayList<>();
-    
-    CASArticleFilter(TypeSystemDescription tsDesc, File outputFolder) {
-      
-      this.tsDesc = tsDesc;
-      this.outputFolder = outputFolder;
-      
-      endOfArticleMarkers.add("{{haveyoursay}}");
-      endOfArticleMarkers.add("== Sources ==");
-      endOfArticleMarkers.add("==Sources==");
-      endOfArticleMarkers.add("== Source ==");
-      endOfArticleMarkers.add("==Source==");
-      endOfArticleMarkers.add("==References==");
-      endOfArticleMarkers.add("== References ==");
-      endOfArticleMarkers.add("=== References===");
-    }
-    
-    public static String titleToUri(String title) {
-      return URLEncoder.encode(title.replaceAll(" ", "_"), 
StandardCharsets.UTF_8);
-    }
-
-    @Override
-    public void process(WikiArticle page, Siteinfo siteinfo)
-        throws SAXException {
-      
-      if (page.getIntegerNamespace() == 0 && page.isMain()) {
-        if (page.getText().toLowerCase().contains("{publish}")) {
-          
-          String pageText = page.getText();
-          int cutIndex = pageText.length();
-
-          for (String endMarker : endOfArticleMarkers) {
-            int endMarkerIndex = pageText.indexOf(endMarker);
-              if (endMarkerIndex != -1 && endMarkerIndex < cutIndex) {
-                cutIndex = endMarkerIndex;
-              }
-          }
-          
-          if (cutIndex < pageText.length()) {
-            pageText = pageText.substring(0, cutIndex);
-          }
-          
-          WikinewsWikiModel wikiModel = new WikinewsWikiModel(
-                  "https://en.wikinews.org/wiki/${image}";,
-                  "https://en.wikinews.org/wiki/${title}";);
-          
-          AnnotatingMarkupParser converter = new AnnotatingMarkupParser();
-          String plainStr = wikiModel.render(converter, pageText);
-          
-          CAS articleCAS = UimaUtil.createEmptyCAS(tsDesc);
-          
-          // TODO: find a way to nicely add title ..
-          StringBuilder articleText = new StringBuilder();
-          articleText.append(page.getTitle());
-          
-          int endOffsetTitle = articleText.length();
-          
-          articleText.append("\n");
-          articleText.append("\n");
-          
-          int bodyOffset = articleText.length();
-          
-          articleText.append(plainStr); // Note: Add offset to annotations ... 
by this
-          
-          articleCAS.setDocumentLanguage("en");
-          articleCAS.setDocumentText(articleText.toString());
-          
-          AnnotationFS headlineAnnotation = 
articleCAS.createAnnotation(articleCAS.getTypeSystem()
-              .getType("org.apache.opennlp.annotations.Headline"),
-              0, endOffsetTitle);
-          
-          articleCAS.addFsToIndexes(headlineAnnotation);
-          
-          for (Annotation paraAnn : converter.getParagraphAnnotations()) {
-            AnnotationFS paraAnnFS = 
articleCAS.createAnnotation(articleCAS.getTypeSystem()
-                .getType("org.apache.opennlp.annotations.Paragraph"),
-                bodyOffset + paraAnn.begin, bodyOffset + paraAnn.end);
-            
-            articleCAS.addFsToIndexes(paraAnnFS);
-          }
-          
-          for (Annotation subHeadAnn : converter.getHeaderAnnotations()) {
-            AnnotationFS subHeadAnnFS = 
articleCAS.createAnnotation(articleCAS.getTypeSystem()
-                .getType("org.apache.opennlp.annotations.SubHeadline"),
-                bodyOffset + subHeadAnn.begin, bodyOffset + subHeadAnn.end);
-            
-            articleCAS.addFsToIndexes(subHeadAnnFS);
-          }
-          
-          Type wikiLinkType = articleCAS.getTypeSystem()
-              .getType("org.apache.opennlp.annotations.WikiLink");
-          Feature linkFeature = wikiLinkType.getFeatureByBaseName("link");
-          
-          for (Annotation wikiLinkAnn : converter.getWikiLinkAnnotations()) {
-            AnnotationFS wikiLinkAnnFS = 
articleCAS.createAnnotation(articleCAS.getTypeSystem()
-                .getType("org.apache.opennlp.annotations.WikiLink"),
-                bodyOffset + wikiLinkAnn.begin, bodyOffset + wikiLinkAnn.end);
-            
-            wikiLinkAnnFS.setStringValue(linkFeature, wikiLinkAnn.value);
-            
-            articleCAS.addFsToIndexes(wikiLinkAnnFS);
-          }
-          
-          CAS markupCas = articleCAS.createView("WikiMarkup");
-          markupCas.setDocumentText(page.toString());
-          
-          // now serialize CAS
-          try (OutputStream casOut = new 
FileOutputStream(outputFolder.getAbsolutePath() +
-                  File.separator + titleToUri(page.getTitle()) + ".xmi")) {
-
-              UimaUtil.serializeCASToXmi(articleCAS, casOut);
-          }
-          catch (IOException e) {
-            e.printStackTrace();
-          }
-        }
-      }
-    }
-  }
-
-  public static void main(String[] args) throws Exception {
-    if (args.length != 2) {
-      System.err.println("Usage: Parser <XML-File> <Output-Folder>"); 
-      System.exit(-1);
-    }
-    
-    // TODO: Should to be configurable!
-    TypeSystemDescription tsDesc = UimaUtil.createTypeSystemDescription(
-        new FileInputStream("samples/TypeSystem.xml"));
-
-    File outputFolder = new File(args[1]);
-    outputFolder.mkdirs();
-    
-    String bz2Filename = args[0];
-    try {
-      IArticleFilter handler = new CASArticleFilter(tsDesc, new File(args[1]));
-      WikiXMLParser wxp = new WikiXMLParser(bz2Filename, handler);
-      wxp.parse();
-    } catch (Exception e) {
-      System.out.println("Parsing the corpus failed:");
-      System.out.println();
-      e.printStackTrace();
-    }
-  }
-}
diff --git 
a/wikinews-importer/src/main/java/org/apache/opennlp/wikinews_importer/WikinewsWikiModel.java
 
b/wikinews-importer/src/main/java/org/apache/opennlp/wikinews_importer/WikinewsWikiModel.java
deleted file mode 100644
index df1592f..0000000
--- 
a/wikinews-importer/src/main/java/org/apache/opennlp/wikinews_importer/WikinewsWikiModel.java
+++ /dev/null
@@ -1,48 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.opennlp.wikinews_importer;
-
-import java.util.Map;
-
-import info.bliki.wiki.model.WikiModel;
-
-public class WikinewsWikiModel extends WikiModel {
-
-  public WikinewsWikiModel(String imageBaseURL, String linkBaseURL) {
-    super(imageBaseURL, linkBaseURL);
-  }
-
-  @Override
-  public String getRawWikiContent(String namespace, String articleName,
-      Map<String, String> map) {
-
-    String result = super.getRawWikiContent(namespace, articleName, map);
-    if (result == null) {
-      
-      // Maybe use special handling for date, ... ?!
-      
-      if (articleName.equals("w"))
-        return map.get("1");
-      
-      return "";
-    }
-    else {
-      return result;
-    }
-  }
-}

Reply via email to