001 /*
002 Copyright (c) 2012, Regents of the University of Colorado
003 All rights reserved.
004
005 Redistribution and use in source and binary forms, with or without modification,
006 are permitted provided that the following conditions are met:
007
008 * Redistributions of source code must retain the above copyright notice, this
009 list of conditions and the following disclaimer.
010
011 * Redistributions in binary form must reproduce the above copyright notice,
012 this list of conditions and the following disclaimer in the documentation
013 and/or other materials provided with the distribution.
014
015 * Neither the name of the University of Colorado nor the names of its
016 contributors may be used to endorse or promote products derived from this
017 software without specific prior written permission.
018
019 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
020 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
021 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
022 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
023 ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
024 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
025 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
026 ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
027 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
028 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
029 */
030 package edu.ucdenver.ccp.nlp.biolemmatizer.uima;
031
032 import java.util.ArrayList;
033 import java.util.List;
034
035 import org.apache.uima.cas.CASException;
036 import org.apache.uima.fit.util.JCasUtil;
037 import org.apache.uima.jcas.JCas;
038 import org.apache.uima.jcas.cas.FSArray;
039 import org.apache.uima.jcas.cas.StringArray;
040 import org.apache.uima.jcas.tcas.Annotation;
041
042 import edu.ucdenver.ccp.uima.shims.annotation.AnnotationDataExtractor;
043 import edu.ucdenver.ccp.uima.shims.annotation.Span;
044 import edu.ucdenver.ccp.uima.shims.annotation.syntactic.token.Lemma;
045 import edu.ucdenver.ccp.uima.shims.annotation.syntactic.token.LemmaDecorator;
046
047 /**
048 * This simple implementation of the {@link LemmaDecorator} interface adds a new
049 * {@link LemmaAnnotation} to the CAS for each token annotation processed.
050 *
051 * @author Colorado Computational Pharmacology, UC Denver; ccpsupport@ucdenver.edu
052 *
053 */
054 public class DefaultLemmaDecorator implements LemmaDecorator {
055
056 /**
057 * @return an initialized {@link LemmaAnnotation}
058 * @see edu.ucdenver.ccp.uima.shims.annotation.AnnotationDecorator#newAnnotation(org.apache.uima.jcas.JCas,
059 * java.lang.String, edu.ucdenver.ccp.uima.shims.annotation.Span)
060 */
061 @Override
062 public Annotation newAnnotation(JCas jcas, @SuppressWarnings("unused") String type, Span span) {
063 LemmaAnnotation lemmaAnnot = new LemmaAnnotation(jcas, span.getSpanStart(), span.getSpanEnd());
064 lemmaAnnot.addToIndexes();
065 return lemmaAnnot;
066 }
067
068 /**
069 * In the case of the {@link DefaultLemmaDecorator}, the annotation to decorate is a
070 * {@link LemmaAnnotation} with the same span as the token that was used to process the lemma.
071 * This method looks to see if that {@link LemmaAnnotation} exists. If it does it is returned.
072 * If it does not exist, then a new {@link LemmaAnnotation} is created and then returned.
073 *
074 * @param tokenAnnotation
075 * in this case, the input annotation represents the token annotation whose covered
076 * text was lemmatized
077 *
078 * @see edu.ucdenver.ccp.uima.shims.annotation.AnnotationDecorator#getAnnotationToDecorate(org.apache.uima.jcas.tcas.Annotation,
079 * edu.ucdenver.ccp.uima.shims.annotation.AnnotationDataExtractor)
080 */
081 @Override
082 public Annotation getAnnotationToDecorate(Annotation tokenAnnotation,
083 @SuppressWarnings("unused") AnnotationDataExtractor annotationDataExtractor) {
084 JCas jCas = null;
085 try {
086 jCas = tokenAnnotation.getCAS().getJCas();
087 } catch (CASException e) {
088 throw new IllegalStateException(e);
089 }
090 List<LemmaAnnotation> existingLemmaAnnotations = JCasUtil.selectCovered(jCas, LemmaAnnotation.class,
091 tokenAnnotation);
092 if (existingLemmaAnnotations.isEmpty()) {
093 return newAnnotation(jCas, null, new Span(tokenAnnotation.getBegin(), tokenAnnotation.getEnd()));
094 }
095 if (existingLemmaAnnotations.size() == 1) {
096 return existingLemmaAnnotations.get(0);
097 }
098 throw new IllegalStateException("Multiple LemmaAnnotations covering: " + tokenAnnotation.toString(0));
099 }
100
101 /**
102 *
103 * @see edu.ucdenver.ccp.uima.shims.annotation.AnnotationDecorator#decorateAnnotation(org.apache
104 * .uima.jcas.tcas.Annotation, java.lang.String, java.lang.Object)
105 */
106 @Override
107 public void decorateAnnotation(Annotation annotation, @SuppressWarnings("unused") String attributeType, Lemma lemma) {
108 insertLemma(annotation, lemma);
109 }
110
111 /**
112 * Inserts information representing the input {@link Lemma} into the input {@link Annotation}
113 * which is assumed to be of type {@link LemmaAnnotation} in this instance.
114 *
115 * @throws IllegalArgumentException
116 * if the input {@link Annotation} is not a {@link LemmaAnnotation}
117 *
118 * @see edu.ucdenver.ccp.uima.shims.annotation.syntactic.token.LemmaDecorator#insertLemma(org.apache.uima.jcas.tcas.Annotation,
119 * edu.ucdenver.ccp.uima.shims.annotation.syntactic.token.Lemma)
120 */
121 @Override
122 public void insertLemma(Annotation annotation, Lemma lemma) {
123 checkAnnotationType(annotation);
124 LemmaAnnotation lemmaAnnot = (LemmaAnnotation) annotation;
125 try {
126 addLemma(lemmaAnnot, lemma);
127 } catch (CASException e) {
128 throw new IllegalStateException(e);
129 }
130 }
131
132 /**
133 * Checks that the input {@link Annotation} is a {@link LemmaAnnotation}
134 *
135 * @param annotation
136 * @throws IllegalArgumentException
137 * if the input {@link Annotation} is not a {@link LemmaAnnotation}
138 *
139 */
140 private static void checkAnnotationType(Annotation annotation) {
141 if (!(annotation instanceof LemmaAnnotation)) {
142 throw new IllegalArgumentException(
143 "Expecting LemmaAnnotation class. Unable to assign lemma information to annotation of type: "
144 + annotation.getClass().getName());
145 }
146 }
147
148 /**
149 * Transfers information from the input {@link Lemma} to the input {@link LemmaAnnotation},
150 * specifically the lemmatized string and its accompanying part-of-speech
151 *
152 * @param lemmaAnnot
153 * @param lemma
154 * @throws CASException
155 * if the {@link JCas} is not retrievable from the input {@link LemmaAnnotation}
156 */
157 private static void addLemma(LemmaAnnotation lemmaAnnot, Lemma lemma) throws CASException {
158 JCas jCas = lemmaAnnot.getCAS().getJCas();
159 StringArray lemmas = UimaUtil.addToStringArray(lemmaAnnot.getLemmas(), lemma.getLemma(), jCas);
160 PartOfSpeech pos = getPartOfSpeech(lemma, jCas);
161 FSArray partsOfSpeech = UimaUtil.addToFSArray(lemmaAnnot.getPartsOfSpeech(), pos, jCas);
162 lemmaAnnot.setLemmas(lemmas);
163 lemmaAnnot.setPartsOfSpeech(partsOfSpeech);
164 }
165
166 /**
167 * @param lemmaAnnot
168 * @return a List of {@link Lemma} objects that were found in the input {@link LemmaAnnotation}
169 */
170 private static List<Lemma> extractLemmas(LemmaAnnotation lemmaAnnot) {
171 List<Lemma> lemmasToReturn = new ArrayList<Lemma>();
172 StringArray lemmas = lemmaAnnot.getLemmas();
173 FSArray partsOfSpeech = lemmaAnnot.getPartsOfSpeech();
174 for (int i = 0; i < lemmas.size(); i++) {
175 String lemmaStr = lemmas.get(i);
176 PartOfSpeech pos = (PartOfSpeech) partsOfSpeech.get(i);
177 lemmasToReturn.add(new Lemma(lemmaStr, pos.getPosTag(), pos.getTagSetName()));
178 }
179 return lemmasToReturn;
180 }
181
182 /**
183 * @param lemma
184 * @param jCas
185 * @return a {@link PartOfSpeech} object initialized from the input {@link Lemma}
186 */
187 private static PartOfSpeech getPartOfSpeech(Lemma lemma, JCas jCas) {
188 PartOfSpeech pos = new PartOfSpeech(jCas);
189 pos.setPosTag(lemma.getPos().getPosTag());
190 pos.setTagSetName(lemma.getPos().getTagSetName());
191 return pos;
192 }
193
194 /*
195 * (non-Javadoc)
196 *
197 * @see
198 * edu.ucdenver.ccp.uima.shims.annotation.AnnotationDecorator#extractAttribute(org.apache.uima
199 * .jcas.tcas.Annotation, java.lang.String)
200 */
201 @Override
202 public List<Lemma> extractAttribute(Annotation annotation, String attributeType) {
203 return extractLemmas(annotation);
204 }
205
206 /*
207 * (non-Javadoc)
208 *
209 * @see
210 * edu.ucdenver.ccp.uima.shims.annotation.syntactic.token.LemmaDecorator#extractLemma(org.apache
211 * .uima.jcas.tcas.Annotation)
212 */
213 @Override
214 public List<Lemma> extractLemmas(Annotation annotation) {
215 checkAnnotationType(annotation);
216 LemmaAnnotation lemmaAnnot = (LemmaAnnotation) annotation;
217 return extractLemmas(lemmaAnnot);
218 }
219
220 }