3.4.4. Finishing the Implementation

Now that we have initialized our WS implementation, we only need some more standard Java coding. As this should not require further explanations we just show the finished source code for our new WS implementation here:

package com.example;

import java.io.IOException;
import java.io.InputStreamReader;
import java.io.Serializable;
import java.io.UnsupportedEncodingException;
import java.util.ArrayList;
import java.util.List;

import javax.annotation.Resource;
import javax.jws.WebMethod;
import javax.jws.WebParam;
import javax.jws.WebService;
import javax.jws.soap.SOAPBinding;
import javax.xml.bind.JAXBElement;
import javax.xml.ws.Holder;
import javax.xml.ws.WebServiceContext;

import net.sf.qallme.WebServiceTools;
import net.sf.qallme.gen.ws.AnnotatedSentence;
import net.sf.qallme.gen.ws.InternalServiceFault;
import net.sf.qallme.gen.ws.AnnotatedSentence.Annotation;
import net.sf.qallme.gen.ws.termannotation.TermAnnotator;
import opennlp.tools.dictionary.Dictionary;
import opennlp.tools.namefind.DictionaryNameFinder;
import opennlp.tools.namefind.TokenNameFinder;
import opennlp.tools.tokenize.WhitespaceTokenizer;
import opennlp.tools.util.Span;
import de.dfki.qallme.Util;

/**
 * An implementation of the {@link TermAnnotator} WS interface which is based on
 * term dictionaries.
 */
@WebService(name = "TermAnnotator",
		serviceName = "TermAnnotatorWS",
		portName = "TermAnnotatorPort",
		targetNamespace = "http://qallme.sf.net/wsdl/termannotation.wsdl")
@SOAPBinding(parameterStyle = SOAPBinding.ParameterStyle.BARE)
public class DictionaryTermAnnotatorWSProvider implements TermAnnotator {

	/** path to the terms dictionary file */
	private static final String TERMS_DICT_FILE_PATH = "/res/terms.list";

	/** the web service context is injected by the application server */
	@Resource
	private WebServiceContext wsContext = null;

	/** the finder to use for identifying terms in sentences */
	private TokenNameFinder termFinder = null;

	/**
	 * Annotates English terms in the given sentence.
	 * 
	 * @param annotatedSentence
	 *            the sentence to annotate with terms
	 * @throws InternalServiceFault
	 *             in case there is any internal problem from which the WS
	 *             cannot recover
	 * @see TermAnnotator#annotateTerms(Holder)
	 */
	@Override
	@WebMethod
	public void annotateTerms(
			@WebParam(name = "AnnotatedSentence",
					targetNamespace = "http://qallme.sf.net/xsd/qallmeshared.xsd",
					mode = WebParam.Mode.INOUT,
					partName = "annotatedSentence")
					Holder<AnnotatedSentence> annotatedSentence)
			throws InternalServiceFault {
		// initialize once
		initialize();

		// save all existing annotations in the input
		List<JAXBElement<Annotation>> replacedAnnotations = new ArrayList<JAXBElement<Annotation>>();
		String input = Util.replaceAnnotationsWithPlaceholders(
				annotatedSentence.value, replacedAnnotations).toString();

		// find all terms and add them as annotations to the result sentence
		AnnotatedSentence result = Util.createAnnotatedSentence(null);
		String[] tokenization = WhitespaceTokenizer.INSTANCE.tokenize(input);
		assembleSentenceContent(result.getContent(), tokenization,
				this.termFinder.find(tokenization));

		// restore all saved annotations and return the new annotated sentence
		annotatedSentence.value = Util.replacePlaceholdersWithAnnotations(
				result, replacedAnnotations);
	}

	/**
	 * Assemles the given content list of a new {@link AnnotatedSentence} using
	 * the provided sentence tokens and the provided terms to annotate.
	 * 
	 * @param sentContent
	 *            the content list to assemble
	 * @param tokens
	 *            the raw sentence tokens to use in the new sentence
	 * @param terms
	 *            the terms (or rather their positions in the given
	 *            tokenization) to use as annotations in the new sentence
	 */
	private static void assembleSentenceContent(List<Serializable> sentContent,
			String[] tokens, Span[] terms) {
		int prevTermEnd = 0;
		for (Span span : terms) {
			if (prevTermEnd != span.getStart())
				sentContent.add(createText(tokens, prevTermEnd,
						span.getStart(), true));
			sentContent
					.add(Util.createAnnotation(createText(tokens, span
							.getStart(), span.getEnd(), false),
							"YOUR_TERM_TYPE", null));
			prevTermEnd = span.getEnd();
		}
		if (prevTermEnd != tokens.length)
			sentContent.add(createText(tokens, prevTermEnd, tokens.length,
					false));
	}

	/**
	 * Creates a single piece of text using the tokens from the given array
	 * slice. A single whitespace character is joined between all tokens in the
	 * result.
	 * 
	 * @param tokens
	 *            the tokens from which to take a slice to create text
	 * @param start
	 *            the index of the first token from the array to include
	 * @param end
	 *            the index of the first token of the slice to not include
	 *            anymore
	 * @param addFinalSpace
	 *            whether to add a final whitespace character to the string (
	 *            {@code true}) or not ({@code false})
	 * @return the created text string
	 */
	private static String createText(String[] tokens, int start, int end,
			boolean addFinalSpace) {
		StringBuilder result = new StringBuilder(tokens[start]);
		for (int i = start + 1; i < end; i++)
			result.append(' ').append(tokens[i]);
		if (addFinalSpace)
			result.append(' ');
		return result.toString();
	}

	/**
	 * Initializes this WS implementation, if necessary.
	 * 
	 * @throws InternalServiceFault
	 *             in case there is any internal problem from which the WS
	 *             cannot recover
	 */
	private synchronized void initialize() throws InternalServiceFault {
		if (this.termFinder != null)
			return;
		this.termFinder = new DictionaryNameFinder(readTermDictionary());
	}

	/**
	 * Reads the terms dictionary which is specified by
	 * {@link #TERMS_DICT_FILE_PATH} and returns it. The file has to contain one
	 * term per line where the tokens of each term are separated by whitespaces.
	 * 
	 * @return the read terms dictionary
	 * @throws InternalServiceFault
	 *             in case there is some IO problem from which the WS cannot
	 *             recover
	 */
	private Dictionary readTermDictionary() throws InternalServiceFault {
		try {
			return Dictionary
					.parseOneEntryPerLine(new InputStreamReader(WebServiceTools
							.getServletContext(this.wsContext)
							.getResourceAsStream(TERMS_DICT_FILE_PATH), "UTF-8"));
		} catch (UnsupportedEncodingException e) {
			// should never happen
			throw new InternalServiceFault(
					"Could not read the term dictionary.", e.getMessage(), e);
		} catch (IOException e) {
			throw new InternalServiceFault(
					"Could not read the term dictionary.", e.getMessage(), e);
		}
	}

}