001 /* 002 Copyright (c) 2012, Regents of the University of Colorado 003 All rights reserved. 004 005 Redistribution and use in source and binary forms, with or without modification, 006 are permitted provided that the following conditions are met: 007 008 * Redistributions of source code must retain the above copyright notice, this 009 list of conditions and the following disclaimer. 010 011 * Redistributions in binary form must reproduce the above copyright notice, 012 this list of conditions and the following disclaimer in the documentation 013 and/or other materials provided with the distribution. 014 015 * Neither the name of the University of Colorado nor the names of its 016 contributors may be used to endorse or promote products derived from this 017 software without specific prior written permission. 018 019 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 020 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 021 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 022 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR 023 ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 024 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 025 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 026 ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 027 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 028 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 029 */ 030 package edu.ucdenver.ccp.nlp.biolemmatizer; 031 032 import java.util.Collection; 033 import java.util.HashSet; 034 import java.util.Map; 035 import java.util.Map.Entry; 036 import java.util.Set; 037 038 /** LemmataEntry: store POS tags and corresponding lemmas for one lemmata entry */ 039 public class LemmataEntry { 040 /** 041 * Map to store lemmata info. A POS tag is the key, and the corresponding lemma is the value. 042 */ 043 public Map<String, String> lemmasAndCategories; 044 045 /** Lemma separator character */ 046 public static String lemmaSeparator = "||"; 047 048 /** 049 * Provides mappings from POS tags to a corresponding tag set name 050 */ 051 private final POSEntry posEntry; 052 053 /** 054 * Construtor to initialize the class field 055 * 056 * @param lemmasAndCategories 057 * a Map object that stores lemmata info 058 * @param posEntry 059 * provides mappings from POS tags to a corresponding tag set name 060 * 061 */ 062 public LemmataEntry(Map<String, String> lemmasAndCategories, POSEntry posEntry) { 063 this.lemmasAndCategories = lemmasAndCategories; 064 this.posEntry = posEntry; 065 } 066 067 /** 068 * Override toString() method to represent lemma and POS info in a concatenated triplet; 069 * Different lemmas are separated by lemmaSeparator 070 */ 071 @Override 072 public String toString() { 073 String lemmas = "*"; 074 075 if (!lemmasAndCategories.isEmpty()) { 076 lemmas = ""; 077 String lemma; 078 int i = 0; 079 for (String key : lemmasAndCategories.keySet()) { 080 lemma = lemmasAndCategories.get(key) + " " + key + " " + posEntry.getTagSetLabel(key); 081 082 lemmas += lemma; 083 084 if (i < lemmasAndCategories.keySet().size() - 1) { 085 lemmas = lemmas + lemmaSeparator; 086 } 087 i++; 088 } 089 } 090 091 return lemmas; 092 } 093 094 /** 095 * Represent lemmas of different POS tags; separated by lemmaSeparator 096 * 097 * @return string representation of lemma info 098 */ 099 public String lemmasToString() { 100 String lemmas = "*"; 101 102 if (!lemmasAndCategories.isEmpty()) { 103 lemmas = ""; 104 String lemma; 105 int i = 0; 106 107 // remove duplicate lemmas 108 Set<String> checkSet = new HashSet<String>(); 109 for (String key : lemmasAndCategories.keySet()) { 110 lemma = lemmasAndCategories.get(key); 111 if (!checkSet.contains(lemma)) 112 checkSet.add(lemma); 113 } 114 115 for (String setItem : checkSet) { 116 lemmas += setItem; 117 if (i < checkSet.size() - 1) 118 lemmas = lemmas + lemmaSeparator; 119 i++; 120 } 121 } 122 123 return lemmas; 124 } 125 126 /** 127 * @return a {@link Collection} of unique {@link Lemma} objects 128 */ 129 public Collection<Lemma> getLemmas() { 130 Set<Lemma> lemmas = new HashSet<Lemma>(); 131 for (Entry<String, String> entry : lemmasAndCategories.entrySet()) { 132 String posTag = entry.getKey(); 133 String lemma = entry.getValue(); 134 String tagSetName = posEntry.getTagSetLabel(posTag); 135 lemmas.add(new Lemma(lemma, posTag, tagSetName)); 136 } 137 return lemmas; 138 } 139 140 /** 141 * Simple utility class to store a single lemma/pos combination 142 * 143 * @author Colorado Computational Pharmacology, UC Denver; ccpsupport@ucdenver.edu 144 * 145 */ 146 public static class Lemma { 147 private final String lemma; 148 private final String pos; 149 private final String tagSetName; 150 151 /** 152 * @param lemma 153 * @param pos 154 * @param tagSetName 155 */ 156 public Lemma(String lemma, String pos, String tagSetName) { 157 super(); 158 this.lemma = lemma; 159 this.pos = pos; 160 this.tagSetName = tagSetName; 161 } 162 163 /** 164 * @return the lemma 165 */ 166 public String getLemma() { 167 return lemma; 168 } 169 170 /** 171 * @return the pos 172 */ 173 public String getPos() { 174 return pos; 175 } 176 177 /** 178 * @return the tagSetName 179 */ 180 public String getTagSetName() { 181 return tagSetName; 182 } 183 184 /* 185 * (non-Javadoc) 186 * 187 * @see java.lang.Object#hashCode() 188 */ 189 @Override 190 public int hashCode() { 191 final int prime = 31; 192 int result = 1; 193 result = prime * result + ((lemma == null) ? 0 : lemma.hashCode()); 194 result = prime * result + ((pos == null) ? 0 : pos.hashCode()); 195 result = prime * result + ((tagSetName == null) ? 0 : tagSetName.hashCode()); 196 return result; 197 } 198 199 /* 200 * (non-Javadoc) 201 * 202 * @see java.lang.Object#equals(java.lang.Object) 203 */ 204 @Override 205 public boolean equals(Object obj) { 206 if (this == obj) 207 return true; 208 if (obj == null) 209 return false; 210 if (getClass() != obj.getClass()) 211 return false; 212 Lemma other = (Lemma) obj; 213 if (lemma == null) { 214 if (other.lemma != null) 215 return false; 216 } else if (!lemma.equals(other.lemma)) 217 return false; 218 if (pos == null) { 219 if (other.pos != null) 220 return false; 221 } else if (!pos.equals(other.pos)) 222 return false; 223 if (tagSetName == null) { 224 if (other.tagSetName != null) 225 return false; 226 } else if (!tagSetName.equals(other.tagSetName)) 227 return false; 228 return true; 229 } 230 231 } 232 233 }