public final class AnalyzedTokenReadings extends Object implements Iterable<AnalyzedToken>
AnalyzedToken
s used to store multiple POS tags and lemmas
for a given single token.Constructor and Description |
---|
AnalyzedTokenReadings(AnalyzedToken token) |
AnalyzedTokenReadings(AnalyzedToken[] tokens,
int startPos) |
AnalyzedTokenReadings(AnalyzedToken token,
int startPos) |
AnalyzedTokenReadings(AnalyzedTokenReadings oldAtr,
List<AnalyzedToken> newReadings,
String ruleApplied) |
AnalyzedTokenReadings(List<AnalyzedToken> tokens,
int startPos) |
Modifier and Type | Method and Description |
---|---|
void |
addReading(AnalyzedToken token,
String ruleApplied)
Add a new reading.
|
boolean |
equals(Object obj) |
AnalyzedToken |
getAnalyzedToken(int idx)
Get a token reading.
|
List<ChunkTag> |
getChunkTags() |
String |
getCleanToken() |
int |
getEndPos() |
String |
getHistoricalAnnotations()
Used to track disambiguator actions.
|
int |
getImmunizationSourceLine() |
int |
getPosFix() |
List<AnalyzedToken> |
getReadings() |
int |
getReadingsLength()
Number of readings.
|
int |
getStartPos() |
String |
getToken() |
String |
getWhitespaceBefore() |
boolean |
hasAnyLemma(String... lemmas)
Checks if one of the token's readings has one of the given lemmas
|
boolean |
hasAnyPartialPosTag(String... posTags)
Checks if the token has any of the given particular POS tags (only a part of the given POS tag needs to match)
|
int |
hashCode() |
boolean |
hasLemma(String lemma)
Checks if one of the token's readings has a particular lemma.
|
boolean |
hasPartialPosTag(String posTag)
Checks if the token has a particular POS tag, where only a part of the given POS tag needs to match.
|
boolean |
hasPosTag(String posTag)
Checks if the token has a particular POS tag.
|
boolean |
hasPosTagAndLemma(String posTag,
String lemma)
Checks if the token has a particular POS tag and lemma.
|
boolean |
hasPosTagStartingWith(String posTag)
Checks if the token has a POS tag starting with the given string.
|
boolean |
hasReading()
Checks if there is at least one POS tag
|
boolean |
hasSameLemmas()
Used to optimize pattern matching.
|
boolean |
hasTypographicApostrophe() |
void |
ignoreSpelling()
Make the token ignored by all spelling rules.
|
void |
immunize(int sourceLine) |
boolean |
isFieldCode() |
boolean |
isIgnoredBySpeller()
Test if the token can be ignored by spelling rules.
|
boolean |
isImmunized() |
boolean |
isLinebreak()
Returns true if the token equals
\n , \r , \n\r , or \r\n . |
boolean |
isNonWord() |
boolean |
isParagraphEnd() |
boolean |
isPosTagUnknown()
Test if the token's POStag equals null.
|
boolean |
isSentenceEnd() |
boolean |
isSentenceStart() |
boolean |
isTagged() |
boolean |
isWhitespace() |
boolean |
isWhitespaceBefore() |
Iterator<AnalyzedToken> |
iterator() |
void |
leaveReading(AnalyzedToken token)
Removes all readings but the one that matches the token given.
|
boolean |
matchesChunkRegex(String chunkRegex) |
boolean |
matchesPosTagRegex(String posTagRegex)
Checks if at least one of the readings matches a given POS tag regex.
|
AnalyzedToken |
readingWithLemma(String lemma)
Returns the first reading that matches a given lemma.
|
AnalyzedToken |
readingWithTagRegex(String posTagRegex)
Returns the first reading that matches a given POS tag regex.
|
void |
removeReading(AnalyzedToken token,
String ruleApplied)
Removes a reading from the list of readings.
|
void |
setChunkTags(List<ChunkTag> chunkTags) |
void |
setCleanToken(String cleanToken) |
void |
setParagraphEnd()
Add a reading with a paragraph end token unless this is already a paragraph end.
|
void |
setPosFix(int fix) |
void |
setSentEnd()
Add a SENT_END tag.
|
void |
setStartPos(int position) |
void |
setTypographicApostrophe() |
void |
setWhitespaceBefore(String prevToken) |
String |
toString() |
clone, finalize, getClass, notify, notifyAll, wait, wait, wait
forEach, spliterator
public AnalyzedTokenReadings(AnalyzedToken[] tokens, int startPos)
public AnalyzedTokenReadings(AnalyzedToken token, int startPos)
public AnalyzedTokenReadings(List<AnalyzedToken> tokens, int startPos)
public AnalyzedTokenReadings(AnalyzedTokenReadings oldAtr, List<AnalyzedToken> newReadings, String ruleApplied)
public AnalyzedTokenReadings(AnalyzedToken token)
public List<AnalyzedToken> getReadings()
public AnalyzedToken getAnalyzedToken(int idx)
public boolean hasPosTag(String posTag)
posTag
- POS tag to look forpublic boolean hasPosTagAndLemma(String posTag, String lemma)
posTag
- POS tag and lemma to look forpublic boolean hasReading()
public boolean hasLemma(String lemma)
lemma
- lemma POS tag to look forpublic boolean hasAnyLemma(String... lemmas)
lemmas
- lemmas to look forpublic boolean hasPartialPosTag(String posTag)
posTag
- POS tag substring to look forpublic boolean hasAnyPartialPosTag(String... posTags)
posTags
- POS tag substring to look forpublic boolean hasPosTagStartingWith(String posTag)
posTag
- POS tag substring to look forpublic boolean matchesPosTagRegex(String posTagRegex)
posTagRegex
- POS tag regular expression to look forpublic boolean matchesChunkRegex(String chunkRegex)
public AnalyzedToken readingWithTagRegex(String posTagRegex)
posTagRegex
- POS tag regular expression to look forpublic AnalyzedToken readingWithLemma(String lemma)
public void addReading(AnalyzedToken token, String ruleApplied)
token
- new reading, given as AnalyzedToken
public void removeReading(AnalyzedToken token, String ruleApplied)
token
- reading to be removedpublic void leaveReading(AnalyzedToken token)
token
- Token to be matchedpublic int getReadingsLength()
public boolean isWhitespace()
public boolean isLinebreak()
\n
, \r
, \n\r
, or \r\n
.public boolean isSentenceStart()
public boolean isParagraphEnd()
public void setParagraphEnd()
public boolean isSentenceEnd()
public boolean isFieldCode()
public void setSentEnd()
public int getStartPos()
public int getEndPos()
public void setStartPos(int position)
public void setPosFix(int fix)
public int getPosFix()
public String getToken()
public void setWhitespaceBefore(String prevToken)
public String getWhitespaceBefore()
public boolean isWhitespaceBefore()
public void immunize(int sourceLine)
public boolean isImmunized()
public int getImmunizationSourceLine()
public void ignoreSpelling()
public boolean isIgnoredBySpeller()
public boolean isPosTagUnknown()
public String getHistoricalAnnotations()
public boolean isTagged()
public boolean hasSameLemmas()
AnalyzedToken
lemmas are the same.public boolean isNonWord()
@NotNull public Iterator<AnalyzedToken> iterator()
iterator
in interface Iterable<AnalyzedToken>
@Experimental public void setCleanToken(String cleanToken)
@Experimental public String getCleanToken()
public void setTypographicApostrophe()
public boolean hasTypographicApostrophe()