[ https://issues.apache.org/jira/browse/CTAKES-74?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Sean Finan resolved CTAKES-74. ------------------------------ Fix Version/s: 4.0.1 Resolution: Fixed > Tokenizer PennTreeBank breaks with certain apostrophes in tokens. > ----------------------------------------------------------------- > > Key: CTAKES-74 > URL: https://issues.apache.org/jira/browse/CTAKES-74 > Project: cTAKES > Issue Type: Task > Components: ctakes-core > Affects Versions: 3.0-incubating > Reporter: Pei Chen > Priority: Critical > Fix For: future enhancement, 4.0.1 > > > The new TokenizerPTB breaks with certain apostrophes such as: > "N'theaster". This came out of 2.6 but should also exist in 2.5 as well. > Sample Text to recreate: > Peis doctor appoitment was cancelled due to the N'theaster. > Exception: > 10/8/12 12:22:47 PM - 10: > org.apache.uima.tools.cvd.MainFrame.handleException(527): SEVERE: Annotator > processing failed. > org.apache.uima.analysis_engine.AnalysisEngineProcessException: Annotator > processing failed. > at > org.apache.uima.analysis_engine.impl.PrimitiveAnalysisEngine_impl.callAnalysisComponentProcess(PrimitiveAnalysisEngine_impl.java:391) > at > org.apache.uima.analysis_engine.impl.PrimitiveAnalysisEngine_impl.processAndOutputNewCASes(PrimitiveAnalysisEngine_impl.java:296) > at > org.apache.uima.analysis_engine.asb.impl.ASB_impl$AggregateCasIterator.processUntilNextOutputCas(ASB_impl.java:567) > at > org.apache.uima.analysis_engine.asb.impl.ASB_impl$AggregateCasIterator.<init>(ASB_impl.java:409) > at > org.apache.uima.analysis_engine.asb.impl.ASB_impl.process(ASB_impl.java:342) > at > org.apache.uima.analysis_engine.impl.AggregateAnalysisEngine_impl.processAndOutputNewCASes(AggregateAnalysisEngine_impl.java:267) > at > org.apache.uima.analysis_engine.impl.AnalysisEngineImplBase.process(AnalysisEngineImplBase.java:267) > at > org.apache.uima.tools.cvd.MainFrame.internalRunAE(MainFrame.java:1526) > at org.apache.uima.tools.cvd.MainFrame.runAE(MainFrame.java:430) > at > org.apache.uima.tools.cvd.control.AnnotatorRerunEventHandler.actionPerformed(AnnotatorRerunEventHandler.java:40) > at javax.swing.AbstractButton.fireActionPerformed(Unknown Source) > at javax.swing.AbstractButton$Handler.actionPerformed(Unknown Source) > at javax.swing.DefaultButtonModel.fireActionPerformed(Unknown Source) > at javax.swing.DefaultButtonModel.setPressed(Unknown Source) > at javax.swing.AbstractButton.doClick(Unknown Source) > at javax.swing.AbstractButton.doClick(Unknown Source) > at > javax.swing.plaf.basic.BasicMenuItemUI$Actions.actionPerformed(Unknown Source) > at javax.swing.SwingUtilities.notifyAction(Unknown Source) > at javax.swing.JComponent.processKeyBinding(Unknown Source) > at javax.swing.JMenuBar.processBindingForKeyStrokeRecursive(Unknown > Source) > at javax.swing.JMenuBar.processBindingForKeyStrokeRecursive(Unknown > Source) > at javax.swing.JMenuBar.processBindingForKeyStrokeRecursive(Unknown > Source) > at javax.swing.JMenuBar.processKeyBinding(Unknown Source) > at javax.swing.KeyboardManager.fireBinding(Unknown Source) > at javax.swing.KeyboardManager.fireKeyboardAction(Unknown Source) > at javax.swing.JComponent.processKeyBindingsForAllComponents(Unknown > Source) > at javax.swing.JComponent.processKeyBindings(Unknown Source) > at javax.swing.JComponent.processKeyEvent(Unknown Source) > at java.awt.Component.processEvent(Unknown Source) > at java.awt.Container.processEvent(Unknown Source) > at java.awt.Component.dispatchEventImpl(Unknown Source) > at java.awt.Container.dispatchEventImpl(Unknown Source) > at java.awt.Component.dispatchEvent(Unknown Source) > at java.awt.KeyboardFocusManager.redispatchEvent(Unknown Source) > at java.awt.DefaultKeyboardFocusManager.dispatchKeyEvent(Unknown Source) > at java.awt.DefaultKeyboardFocusManager.preDispatchKeyEvent(Unknown > Source) > at java.awt.DefaultKeyboardFocusManager.typeAheadAssertions(Unknown > Source) > at java.awt.DefaultKeyboardFocusManager.dispatchEvent(Unknown Source) > at java.awt.Component.dispatchEventImpl(Unknown Source) > at java.awt.Container.dispatchEventImpl(Unknown Source) > at java.awt.Window.dispatchEventImpl(Unknown Source) > at java.awt.Component.dispatchEvent(Unknown Source) > at java.awt.EventQueue.dispatchEventImpl(Unknown Source) > at java.awt.EventQueue.access$000(Unknown Source) > at java.awt.EventQueue$1.run(Unknown Source) > at java.awt.EventQueue$1.run(Unknown Source) > at java.security.AccessController.doPrivileged(Native Method) > at java.security.AccessControlContext$1.doIntersectionPrivilege(Unknown > Source) > at java.security.AccessControlContext$1.doIntersectionPrivilege(Unknown > Source) > at java.awt.EventQueue$2.run(Unknown Source) > at java.awt.EventQueue$2.run(Unknown Source) > at java.security.AccessController.doPrivileged(Native Method) > at java.security.AccessControlContext$1.doIntersectionPrivilege(Unknown > Source) > at java.awt.EventQueue.dispatchEvent(Unknown Source) > at java.awt.EventDispatchThread.pumpOneEventForFilters(Unknown Source) > at java.awt.EventDispatchThread.pumpEventsForFilter(Unknown Source) > at java.awt.EventDispatchThread.pumpEventsForHierarchy(Unknown Source) > at java.awt.EventDispatchThread.pumpEvents(Unknown Source) > at java.awt.EventDispatchThread.pumpEvents(Unknown Source) > at java.awt.EventDispatchThread.run(Unknown Source) > Caused by: java.lang.StringIndexOutOfBoundsException: String index out of > range: 0 > at java.lang.String.charAt(Unknown Source) > at > org.apache.ctakes.core.nlp.tokenizer.TokenizerPTB.setNumPosition(TokenizerPTB.java:1148) > at > org.apache.ctakes.core.nlp.tokenizer.TokenizerPTB.createToken(TokenizerPTB.java:1051) > at > org.apache.ctakes.core.nlp.tokenizer.TokenizerPTB.tokenizeTextSegment(TokenizerPTB.java:348) > at > org.apache.ctakes.core.ae.TokenizerAnnotatorPTB.annotateRange(TokenizerAnnotatorPTB.java:173) > at > org.apache.ctakes.core.ae.TokenizerAnnotatorPTB.process(TokenizerAnnotatorPTB.java:117) > at > org.apache.uima.analysis_component.JCasAnnotator_ImplBase.process(JCasAnnotator_ImplBase.java:48) > at > org.apache.uima.analysis_engine.impl.PrimitiveAnalysisEngine_impl.callAnalysisComponentProcess(PrimitiveAnalysisEngine_impl.java:375) > ... 59 more -- This message was sent by Atlassian Jira (v8.20.10#820010)