001    /*
002     Copyright (c) 2012, Regents of the University of Colorado
003     All rights reserved.
004    
005     Redistribution and use in source and binary forms, with or without modification, 
006     are permitted provided that the following conditions are met:
007    
008     * Redistributions of source code must retain the above copyright notice, this 
009        list of conditions and the following disclaimer.
010       
011     * Redistributions in binary form must reproduce the above copyright notice, 
012        this list of conditions and the following disclaimer in the documentation 
013        and/or other materials provided with the distribution.
014       
015     * Neither the name of the University of Colorado nor the names of its 
016        contributors may be used to endorse or promote products derived from this 
017        software without specific prior written permission.
018    
019     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 
020     ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 
021     WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 
022     DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
023     ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 
024     (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 
025     LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 
026     ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
027     (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 
028     SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
029     */
030    package edu.ucdenver.ccp.nlp.biolemmatizer.uima;
031    
032    import java.util.ArrayList;
033    import java.util.List;
034    
035    import org.apache.uima.cas.CASException;
036    import org.apache.uima.fit.util.JCasUtil;
037    import org.apache.uima.jcas.JCas;
038    import org.apache.uima.jcas.cas.FSArray;
039    import org.apache.uima.jcas.cas.StringArray;
040    import org.apache.uima.jcas.tcas.Annotation;
041    
042    import edu.ucdenver.ccp.uima.shims.annotation.AnnotationDataExtractor;
043    import edu.ucdenver.ccp.uima.shims.annotation.Span;
044    import edu.ucdenver.ccp.uima.shims.annotation.syntactic.token.Lemma;
045    import edu.ucdenver.ccp.uima.shims.annotation.syntactic.token.LemmaDecorator;
046    
047    /**
048     * This simple implementation of the {@link LemmaDecorator} interface adds a new
049     * {@link LemmaAnnotation} to the CAS for each token annotation processed. 
050     * 
051     * @author Colorado Computational Pharmacology, UC Denver; ccpsupport@ucdenver.edu
052     * 
053     */
054    public class DefaultLemmaDecorator implements LemmaDecorator {
055    
056            /**
057             * @return an initialized {@link LemmaAnnotation}
058             * @see edu.ucdenver.ccp.uima.shims.annotation.AnnotationDecorator#newAnnotation(org.apache.uima.jcas.JCas,
059             *      java.lang.String, edu.ucdenver.ccp.uima.shims.annotation.Span)
060             */
061            @Override
062            public Annotation newAnnotation(JCas jcas, @SuppressWarnings("unused") String type, Span span) {
063                    LemmaAnnotation lemmaAnnot = new LemmaAnnotation(jcas, span.getSpanStart(), span.getSpanEnd());
064                    lemmaAnnot.addToIndexes();
065                    return lemmaAnnot;
066            }
067    
068            /**
069             * In the case of the {@link DefaultLemmaDecorator}, the annotation to decorate is a
070             * {@link LemmaAnnotation} with the same span as the token that was used to process the lemma.
071             * This method looks to see if that {@link LemmaAnnotation} exists. If it does it is returned.
072             * If it does not exist, then a new {@link LemmaAnnotation} is created and then returned.
073             * 
074             * @param tokenAnnotation
075             *            in this case, the input annotation represents the token annotation whose covered
076             *            text was lemmatized
077             * 
078             * @see edu.ucdenver.ccp.uima.shims.annotation.AnnotationDecorator#getAnnotationToDecorate(org.apache.uima.jcas.tcas.Annotation,
079             *      edu.ucdenver.ccp.uima.shims.annotation.AnnotationDataExtractor)
080             */
081            @Override
082            public Annotation getAnnotationToDecorate(Annotation tokenAnnotation,
083                            @SuppressWarnings("unused") AnnotationDataExtractor annotationDataExtractor) {
084                    JCas jCas = null;
085                    try {
086                            jCas = tokenAnnotation.getCAS().getJCas();
087                    } catch (CASException e) {
088                            throw new IllegalStateException(e);
089                    }
090                    List<LemmaAnnotation> existingLemmaAnnotations = JCasUtil.selectCovered(jCas, LemmaAnnotation.class,
091                                    tokenAnnotation);
092                    if (existingLemmaAnnotations.isEmpty()) {
093                            return newAnnotation(jCas, null, new Span(tokenAnnotation.getBegin(), tokenAnnotation.getEnd()));
094                    }
095                    if (existingLemmaAnnotations.size() == 1) {
096                            return existingLemmaAnnotations.get(0);
097                    }
098                    throw new IllegalStateException("Multiple LemmaAnnotations covering: " + tokenAnnotation.toString(0));
099            }
100    
101            /**
102             * 
103             * @see edu.ucdenver.ccp.uima.shims.annotation.AnnotationDecorator#decorateAnnotation(org.apache
104             *      .uima.jcas.tcas.Annotation, java.lang.String, java.lang.Object)
105             */
106            @Override
107            public void decorateAnnotation(Annotation annotation, @SuppressWarnings("unused") String attributeType, Lemma lemma) {
108                    insertLemma(annotation, lemma);
109            }
110    
111            /**
112             * Inserts information representing the input {@link Lemma} into the input {@link Annotation}
113             * which is assumed to be of type {@link LemmaAnnotation} in this instance.
114             * 
115             * @throws IllegalArgumentException
116             *             if the input {@link Annotation} is not a {@link LemmaAnnotation}
117             * 
118             * @see edu.ucdenver.ccp.uima.shims.annotation.syntactic.token.LemmaDecorator#insertLemma(org.apache.uima.jcas.tcas.Annotation,
119             *      edu.ucdenver.ccp.uima.shims.annotation.syntactic.token.Lemma)
120             */
121            @Override
122            public void insertLemma(Annotation annotation, Lemma lemma) {
123                    checkAnnotationType(annotation);
124                    LemmaAnnotation lemmaAnnot = (LemmaAnnotation) annotation;
125                    try {
126                            addLemma(lemmaAnnot, lemma);
127                    } catch (CASException e) {
128                            throw new IllegalStateException(e);
129                    }
130            }
131    
132            /**
133             * Checks that the input {@link Annotation} is a {@link LemmaAnnotation}
134             * 
135             * @param annotation
136             * @throws IllegalArgumentException
137             *             if the input {@link Annotation} is not a {@link LemmaAnnotation}
138             * 
139             */
140            private static void checkAnnotationType(Annotation annotation) {
141                    if (!(annotation instanceof LemmaAnnotation)) {
142                            throw new IllegalArgumentException(
143                                            "Expecting LemmaAnnotation class. Unable to assign lemma information to annotation of type: "
144                                                            + annotation.getClass().getName());
145                    }
146            }
147    
148            /**
149             * Transfers information from the input {@link Lemma} to the input {@link LemmaAnnotation},
150             * specifically the lemmatized string and its accompanying part-of-speech
151             * 
152             * @param lemmaAnnot
153             * @param lemma
154             * @throws CASException
155             *             if the {@link JCas} is not retrievable from the input {@link LemmaAnnotation}
156             */
157            private static void addLemma(LemmaAnnotation lemmaAnnot, Lemma lemma) throws CASException {
158                    JCas jCas = lemmaAnnot.getCAS().getJCas();
159                    StringArray lemmas = UimaUtil.addToStringArray(lemmaAnnot.getLemmas(), lemma.getLemma(), jCas);
160                    PartOfSpeech pos = getPartOfSpeech(lemma, jCas);
161                    FSArray partsOfSpeech = UimaUtil.addToFSArray(lemmaAnnot.getPartsOfSpeech(), pos, jCas);
162                    lemmaAnnot.setLemmas(lemmas);
163                    lemmaAnnot.setPartsOfSpeech(partsOfSpeech);
164            }
165    
166            /**
167             * @param lemmaAnnot
168             * @return a List of {@link Lemma} objects that were found in the input {@link LemmaAnnotation}
169             */
170            private static List<Lemma> extractLemmas(LemmaAnnotation lemmaAnnot) {
171                    List<Lemma> lemmasToReturn = new ArrayList<Lemma>();
172                    StringArray lemmas = lemmaAnnot.getLemmas();
173                    FSArray partsOfSpeech = lemmaAnnot.getPartsOfSpeech();
174                    for (int i = 0; i < lemmas.size(); i++) {
175                            String lemmaStr = lemmas.get(i);
176                            PartOfSpeech pos = (PartOfSpeech) partsOfSpeech.get(i);
177                            lemmasToReturn.add(new Lemma(lemmaStr, pos.getPosTag(), pos.getTagSetName()));
178                    }
179                    return lemmasToReturn;
180            }
181    
182            /**
183             * @param lemma
184             * @param jCas
185             * @return a {@link PartOfSpeech} object initialized from the input {@link Lemma}
186             */
187            private static PartOfSpeech getPartOfSpeech(Lemma lemma, JCas jCas) {
188                    PartOfSpeech pos = new PartOfSpeech(jCas);
189                    pos.setPosTag(lemma.getPos().getPosTag());
190                    pos.setTagSetName(lemma.getPos().getTagSetName());
191                    return pos;
192            }
193    
194            /*
195             * (non-Javadoc)
196             * 
197             * @see
198             * edu.ucdenver.ccp.uima.shims.annotation.AnnotationDecorator#extractAttribute(org.apache.uima
199             * .jcas.tcas.Annotation, java.lang.String)
200             */
201            @Override
202            public List<Lemma> extractAttribute(Annotation annotation, String attributeType) {
203                    return extractLemmas(annotation);
204            }
205    
206            /*
207             * (non-Javadoc)
208             * 
209             * @see
210             * edu.ucdenver.ccp.uima.shims.annotation.syntactic.token.LemmaDecorator#extractLemma(org.apache
211             * .uima.jcas.tcas.Annotation)
212             */
213            @Override
214            public List<Lemma> extractLemmas(Annotation annotation) {
215                    checkAnnotationType(annotation);
216                    LemmaAnnotation lemmaAnnot = (LemmaAnnotation) annotation;
217                    return extractLemmas(lemmaAnnot);
218            }
219    
220    }