Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Issue 665 #687

Open
wants to merge 5 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,7 @@
* Developed by: The Cognitive Computation Group University of Illinois at Urbana-Champaign
* http://cogcomp.cs.illinois.edu/
*/
/**
*
*/

package edu.illinois.cs.cogcomp.core.datastructures.textannotation;

import java.util.ArrayList;
Expand Down Expand Up @@ -60,6 +58,14 @@ public SpanLabelView(String viewName, String viewGenerator, TextAnnotation text,

@Override
public void addConstituent(Constituent constituent) {

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

small improvement: please move start/end assignments inside if{} block, as they aren't being used otherwise.

if (!allowOverlappingSpans) {
int start = constituent.getStartSpan();
int end = constituent.getEndSpan();
if (this.getConstituentsCoveringSpan(start, end).size() != 0)
throw new IllegalArgumentException("Span [" + start + ", " + end + "] already labeled.");
}

super.addConstituent(constituent);

// this sort is grossly inefficient when appending contiguous tokens one at a time.
Expand Down Expand Up @@ -95,9 +101,6 @@ public Constituent addSpanLabel(int start, int end, String label, double score)
new Constituent(label, score, this.getViewName(), this.getTextAnnotation(), start,
end);

if (!allowOverlappingSpans && this.getConstituentsCoveringSpan(start, end).size() != 0)
throw new IllegalArgumentException("Span [" + start + ", " + end + "] already labeled.");

this.addConstituent(c);

return c;
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
package edu.illinois.cs.cogcomp.core.datastructures.textannotation;

import edu.illinois.cs.cogcomp.annotation.BasicTextAnnotationBuilder;
import edu.illinois.cs.cogcomp.annotation.TextAnnotationBuilder;
import edu.illinois.cs.cogcomp.core.datastructures.IntPair;
import edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent;
import edu.illinois.cs.cogcomp.core.datastructures.textannotation.SpanLabelView;
import edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation;
import edu.illinois.cs.cogcomp.nlp.tokenizer.Tokenizer;
import org.junit.Before;
import org.junit.Test;

import java.util.ArrayList;
import java.util.List;

/**
* Test that addConstituent(Constituent) does not allow overlapping spans
*/
public class SpanLabelViewTest {
SpanLabelView overlappingSpansView;
SpanLabelView noOverlappingSpansView;
TextAnnotation ta;
Constituent baseConstituent;
Constituent overlappingConstituent;

private Tokenizer.Tokenization tokenization;

String viewName = "VIEWNAME";
String viewGenerator = "VIEW-GENERATOR";
String text = "This is a test string; do not pay it any mind.";
String corpusId = "TEST";
String textId = "ID";

double score = 42.0;
int baseStart = 0;
int baseEnd = 5;
int overStart = 2;
int overEnd = 6;

private Tokenizer.Tokenization getTokenization(String text) {
String[] tokens = text.split("\\s");
List<IntPair> characterOffsets = new ArrayList<>();
int[] sentenceEndArray = {tokens.length};

int charOffsetBegin = 0;
int charOffsetEnd = 0;
for (int i = 0; i < text.length(); i++) {
char c = text.charAt(i);
if (Character.isWhitespace(c)) {
charOffsetEnd = i;
IntPair tokenOffsets = new IntPair(charOffsetBegin, charOffsetEnd);
characterOffsets.add(tokenOffsets);
charOffsetBegin = charOffsetEnd + 1;
}
}
IntPair tokenOffsets = new IntPair(charOffsetBegin, text.length());
characterOffsets.add(tokenOffsets);

IntPair[] charOffsetArray = new IntPair[characterOffsets.size()];

for (int i = 0; i < characterOffsets.size(); i++) {
charOffsetArray[i] = characterOffsets.get(i);
}
Tokenizer.Tokenization tokenization =
new Tokenizer.Tokenization(tokens, charOffsetArray, sentenceEndArray);
return tokenization;
}

@Before
public void init(){
TextAnnotationBuilder taBuilder = new BasicTextAnnotationBuilder();
ta = taBuilder.createTextAnnotation(this.corpusId, this.textId, this.text, getTokenization(this.text));
boolean allowOverlappingSpans = true;
overlappingSpansView = new SpanLabelView(this.viewName, this.viewGenerator,
ta, this.score, allowOverlappingSpans);
allowOverlappingSpans = false;
noOverlappingSpansView = new SpanLabelView(this.viewName, this.viewGenerator,
ta, this.score, allowOverlappingSpans);

baseConstituent = new Constituent("BASE", this.score, this.viewName, ta, baseStart, baseEnd);
overlappingConstituent = new Constituent("OVER", this.score, this.viewName, ta, overStart, overEnd);
}

@Test
public void testOverlappingSpans(){
overlappingSpansView.addConstituent(baseConstituent);
overlappingSpansView.addConstituent(overlappingConstituent);
for(Constituent c : overlappingSpansView.getConstituents()){
if(c.getLabel().equals("BASE")) {
assert c.getStartSpan() == this.baseStart;
assert c.getEndSpan() == this.baseEnd;
}else {
assert c.getStartSpan() == this.overStart;
assert c.getEndSpan() == this.overEnd;
}
}
}

@Test(expected=IllegalArgumentException.class)
public void testNoOverlappingSpans(){
noOverlappingSpansView.addConstituent(baseConstituent);
noOverlappingSpansView.addConstituent(overlappingConstituent);
}
}