001 /*
002 Copyright (c) 2012, Regents of the University of Colorado
003 All rights reserved.
004
005 Redistribution and use in source and binary forms, with or without modification,
006 are permitted provided that the following conditions are met:
007
008 * Redistributions of source code must retain the above copyright notice, this
009 list of conditions and the following disclaimer.
010
011 * Redistributions in binary form must reproduce the above copyright notice,
012 this list of conditions and the following disclaimer in the documentation
013 and/or other materials provided with the distribution.
014
015 * Neither the name of the University of Colorado nor the names of its
016 contributors may be used to endorse or promote products derived from this
017 software without specific prior written permission.
018
019 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
020 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
021 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
022 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
023 ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
024 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
025 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
026 ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
027 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
028 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
029 */
030 package edu.ucdenver.ccp.nlp.biolemmatizer;
031
032 import java.util.Collection;
033 import java.util.HashSet;
034 import java.util.Map;
035 import java.util.Map.Entry;
036 import java.util.Set;
037
038 /** LemmataEntry: store POS tags and corresponding lemmas for one lemmata entry */
039 public class LemmataEntry {
040 /**
041 * Map to store lemmata info. A POS tag is the key, and the corresponding lemma is the value.
042 */
043 public Map<String, String> lemmasAndCategories;
044
045 /** Lemma separator character */
046 public static String lemmaSeparator = "||";
047
048 /**
049 * Provides mappings from POS tags to a corresponding tag set name
050 */
051 private final POSEntry posEntry;
052
053 /**
054 * Construtor to initialize the class field
055 *
056 * @param lemmasAndCategories
057 * a Map object that stores lemmata info
058 * @param posEntry
059 * provides mappings from POS tags to a corresponding tag set name
060 *
061 */
062 public LemmataEntry(Map<String, String> lemmasAndCategories, POSEntry posEntry) {
063 this.lemmasAndCategories = lemmasAndCategories;
064 this.posEntry = posEntry;
065 }
066
067 /**
068 * Override toString() method to represent lemma and POS info in a concatenated triplet;
069 * Different lemmas are separated by lemmaSeparator
070 */
071 @Override
072 public String toString() {
073 String lemmas = "*";
074
075 if (!lemmasAndCategories.isEmpty()) {
076 lemmas = "";
077 String lemma;
078 int i = 0;
079 for (String key : lemmasAndCategories.keySet()) {
080 lemma = lemmasAndCategories.get(key) + " " + key + " " + posEntry.getTagSetLabel(key);
081
082 lemmas += lemma;
083
084 if (i < lemmasAndCategories.keySet().size() - 1) {
085 lemmas = lemmas + lemmaSeparator;
086 }
087 i++;
088 }
089 }
090
091 return lemmas;
092 }
093
094 /**
095 * Represent lemmas of different POS tags; separated by lemmaSeparator
096 *
097 * @return string representation of lemma info
098 */
099 public String lemmasToString() {
100 String lemmas = "*";
101
102 if (!lemmasAndCategories.isEmpty()) {
103 lemmas = "";
104 String lemma;
105 int i = 0;
106
107 // remove duplicate lemmas
108 Set<String> checkSet = new HashSet<String>();
109 for (String key : lemmasAndCategories.keySet()) {
110 lemma = lemmasAndCategories.get(key);
111 if (!checkSet.contains(lemma))
112 checkSet.add(lemma);
113 }
114
115 for (String setItem : checkSet) {
116 lemmas += setItem;
117 if (i < checkSet.size() - 1)
118 lemmas = lemmas + lemmaSeparator;
119 i++;
120 }
121 }
122
123 return lemmas;
124 }
125
126 /**
127 * @return a {@link Collection} of unique {@link Lemma} objects
128 */
129 public Collection<Lemma> getLemmas() {
130 Set<Lemma> lemmas = new HashSet<Lemma>();
131 for (Entry<String, String> entry : lemmasAndCategories.entrySet()) {
132 String posTag = entry.getKey();
133 String lemma = entry.getValue();
134 String tagSetName = posEntry.getTagSetLabel(posTag);
135 lemmas.add(new Lemma(lemma, posTag, tagSetName));
136 }
137 return lemmas;
138 }
139
140 /**
141 * Simple utility class to store a single lemma/pos combination
142 *
143 * @author Colorado Computational Pharmacology, UC Denver; ccpsupport@ucdenver.edu
144 *
145 */
146 public static class Lemma {
147 private final String lemma;
148 private final String pos;
149 private final String tagSetName;
150
151 /**
152 * @param lemma
153 * @param pos
154 * @param tagSetName
155 */
156 public Lemma(String lemma, String pos, String tagSetName) {
157 super();
158 this.lemma = lemma;
159 this.pos = pos;
160 this.tagSetName = tagSetName;
161 }
162
163 /**
164 * @return the lemma
165 */
166 public String getLemma() {
167 return lemma;
168 }
169
170 /**
171 * @return the pos
172 */
173 public String getPos() {
174 return pos;
175 }
176
177 /**
178 * @return the tagSetName
179 */
180 public String getTagSetName() {
181 return tagSetName;
182 }
183
184 /*
185 * (non-Javadoc)
186 *
187 * @see java.lang.Object#hashCode()
188 */
189 @Override
190 public int hashCode() {
191 final int prime = 31;
192 int result = 1;
193 result = prime * result + ((lemma == null) ? 0 : lemma.hashCode());
194 result = prime * result + ((pos == null) ? 0 : pos.hashCode());
195 result = prime * result + ((tagSetName == null) ? 0 : tagSetName.hashCode());
196 return result;
197 }
198
199 /*
200 * (non-Javadoc)
201 *
202 * @see java.lang.Object#equals(java.lang.Object)
203 */
204 @Override
205 public boolean equals(Object obj) {
206 if (this == obj)
207 return true;
208 if (obj == null)
209 return false;
210 if (getClass() != obj.getClass())
211 return false;
212 Lemma other = (Lemma) obj;
213 if (lemma == null) {
214 if (other.lemma != null)
215 return false;
216 } else if (!lemma.equals(other.lemma))
217 return false;
218 if (pos == null) {
219 if (other.pos != null)
220 return false;
221 } else if (!pos.equals(other.pos))
222 return false;
223 if (tagSetName == null) {
224 if (other.tagSetName != null)
225 return false;
226 } else if (!tagSetName.equals(other.tagSetName))
227 return false;
228 return true;
229 }
230
231 }
232
233 }