001    /*
002     Copyright (c) 2012, Regents of the University of Colorado
003     All rights reserved.
004    
005     Redistribution and use in source and binary forms, with or without modification, 
006     are permitted provided that the following conditions are met:
007    
008      * Redistributions of source code must retain the above copyright notice, this 
009        list of conditions and the following disclaimer.
010       
011      * Redistributions in binary form must reproduce the above copyright notice, 
012        this list of conditions and the following disclaimer in the documentation 
013        and/or other materials provided with the distribution.
014       
015      * Neither the name of the University of Colorado nor the names of its 
016        contributors may be used to endorse or promote products derived from this 
017        software without specific prior written permission.
018    
019     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 
020     ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 
021     WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 
022     DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
023     ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 
024     (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 
025     LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 
026     ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
027     (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 
028     SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
029     */
030    package edu.ucdenver.ccp.nlp.biolemmatizer;
031    
032    import java.util.Collection;
033    import java.util.HashSet;
034    import java.util.Map;
035    import java.util.Map.Entry;
036    import java.util.Set;
037    
038    /** LemmataEntry: store POS tags and corresponding lemmas for one lemmata entry */
039    public class LemmataEntry {
040            /**
041             * Map to store lemmata info. A POS tag is the key, and the corresponding lemma is the value.
042             */
043            public Map<String, String> lemmasAndCategories;
044    
045            /** Lemma separator character */
046            public static String lemmaSeparator = "||";
047    
048            /**
049             * Provides mappings from POS tags to a corresponding tag set name
050             */
051            private final POSEntry posEntry;
052    
053            /**
054             * Construtor to initialize the class field
055             * 
056             * @param lemmasAndCategories
057             *            a Map object that stores lemmata info
058             * @param posEntry
059             *            provides mappings from POS tags to a corresponding tag set name
060             * 
061             */
062            public LemmataEntry(Map<String, String> lemmasAndCategories, POSEntry posEntry) {
063                    this.lemmasAndCategories = lemmasAndCategories;
064                    this.posEntry = posEntry;
065            }
066    
067            /**
068             * Override toString() method to represent lemma and POS info in a concatenated triplet;
069             * Different lemmas are separated by lemmaSeparator
070             */
071            @Override
072            public String toString() {
073                    String lemmas = "*";
074    
075                    if (!lemmasAndCategories.isEmpty()) {
076                            lemmas = "";
077                            String lemma;
078                            int i = 0;
079                            for (String key : lemmasAndCategories.keySet()) {
080                                    lemma = lemmasAndCategories.get(key) + " " + key + " " + posEntry.getTagSetLabel(key);
081    
082                                    lemmas += lemma;
083    
084                                    if (i < lemmasAndCategories.keySet().size() - 1) {
085                                            lemmas = lemmas + lemmaSeparator;
086                                    }
087                                    i++;
088                            }
089                    }
090    
091                    return lemmas;
092            }
093    
094            /**
095             * Represent lemmas of different POS tags; separated by lemmaSeparator
096             * 
097             * @return string representation of lemma info
098             */
099            public String lemmasToString() {
100                    String lemmas = "*";
101    
102                    if (!lemmasAndCategories.isEmpty()) {
103                            lemmas = "";
104                            String lemma;
105                            int i = 0;
106    
107                            // remove duplicate lemmas
108                            Set<String> checkSet = new HashSet<String>();
109                            for (String key : lemmasAndCategories.keySet()) {
110                                    lemma = lemmasAndCategories.get(key);
111                                    if (!checkSet.contains(lemma))
112                                            checkSet.add(lemma);
113                            }
114    
115                            for (String setItem : checkSet) {
116                                    lemmas += setItem;
117                                    if (i < checkSet.size() - 1)
118                                            lemmas = lemmas + lemmaSeparator;
119                                    i++;
120                            }
121                    }
122    
123                    return lemmas;
124            }
125    
126            /**
127             * @return a {@link Collection} of unique {@link Lemma} objects
128             */
129            public Collection<Lemma> getLemmas() {
130                    Set<Lemma> lemmas = new HashSet<Lemma>();
131                    for (Entry<String, String> entry : lemmasAndCategories.entrySet()) {
132                            String posTag = entry.getKey();
133                            String lemma = entry.getValue();
134                            String tagSetName = posEntry.getTagSetLabel(posTag);
135                            lemmas.add(new Lemma(lemma, posTag, tagSetName));
136                    }
137                    return lemmas;
138            }
139    
140            /**
141             * Simple utility class to store a single lemma/pos combination
142             * 
143             * @author Colorado Computational Pharmacology, UC Denver; ccpsupport@ucdenver.edu
144             * 
145             */
146            public static class Lemma {
147                    private final String lemma;
148                    private final String pos;
149                    private final String tagSetName;
150    
151                    /**
152                     * @param lemma
153                     * @param pos
154                     * @param tagSetName
155                     */
156                    public Lemma(String lemma, String pos, String tagSetName) {
157                            super();
158                            this.lemma = lemma;
159                            this.pos = pos;
160                            this.tagSetName = tagSetName;
161                    }
162    
163                    /**
164                     * @return the lemma
165                     */
166                    public String getLemma() {
167                            return lemma;
168                    }
169    
170                    /**
171                     * @return the pos
172                     */
173                    public String getPos() {
174                            return pos;
175                    }
176    
177                    /**
178                     * @return the tagSetName
179                     */
180                    public String getTagSetName() {
181                            return tagSetName;
182                    }
183    
184                    /*
185                     * (non-Javadoc)
186                     * 
187                     * @see java.lang.Object#hashCode()
188                     */
189                    @Override
190                    public int hashCode() {
191                            final int prime = 31;
192                            int result = 1;
193                            result = prime * result + ((lemma == null) ? 0 : lemma.hashCode());
194                            result = prime * result + ((pos == null) ? 0 : pos.hashCode());
195                            result = prime * result + ((tagSetName == null) ? 0 : tagSetName.hashCode());
196                            return result;
197                    }
198    
199                    /*
200                     * (non-Javadoc)
201                     * 
202                     * @see java.lang.Object#equals(java.lang.Object)
203                     */
204                    @Override
205                    public boolean equals(Object obj) {
206                            if (this == obj)
207                                    return true;
208                            if (obj == null)
209                                    return false;
210                            if (getClass() != obj.getClass())
211                                    return false;
212                            Lemma other = (Lemma) obj;
213                            if (lemma == null) {
214                                    if (other.lemma != null)
215                                            return false;
216                            } else if (!lemma.equals(other.lemma))
217                                    return false;
218                            if (pos == null) {
219                                    if (other.pos != null)
220                                            return false;
221                            } else if (!pos.equals(other.pos))
222                                    return false;
223                            if (tagSetName == null) {
224                                    if (other.tagSetName != null)
225                                            return false;
226                            } else if (!tagSetName.equals(other.tagSetName))
227                                    return false;
228                            return true;
229                    }
230    
231            }
232    
233    }