The first step is to download and install Tanl.

Running the Demo

> python
>>> from tanl.split.SentenceSplitter import *
>>> from tanl.split.Tokenizer        import *
>>> from tanl.tag.TreeTagger         import *
>>>
>>> splitter = SentenceSplitter('IT-Model.pickle').pipe(open('input.xml'))
>>> tokenizer = Tokenizer().pipe(splitter)
>>> tagger = PosTagger('fullexMorph.tanl').pipe(tokenizer)
>>>
>>> for sentence in tagger:
...     for token in sentence:
...         print token
...
<token FORM="L'" POS="RDns" LEMMA="il" />
<token FORM="armonium" POS="Sms" LEMMA="armonium" />
<token FORM="(" POS="FB" LEMMA="(" />
<token FORM="in" POS="E" LEMMA="in" />
<token FORM="francese" POS="Sns" LEMMA="francese" />
<token FORM="," POS="FF" LEMMA="," />
<token FORM=""" POS="FB" LEMMA=""" />
<token FORM="harmonium" POS="Smn" LEMMA="harmonium" />
<token FORM=""" POS="FB" LEMMA=""" />
<token FORM=")" POS="FB" LEMMA=")" />
<token FORM="รจ" POS="VAip3s" LEMMA="essere" />
<token FORM="uno" POS="RIms" LEMMA="uno" />
<token FORM="strumento" POS="Sms" LEMMA="strumento" />
<token FORM="musicale" POS="Ans" LEMMA="musicale" />
<token FORM="azionato" POS="Vpsms" LEMMA="azionare" />
<token FORM="con" POS="E" LEMMA="con" />
<token FORM="una" POS="RIfs" LEMMA="una" />
<token FORM="tastiera" POS="Sfs" LEMMA="tastiera" />
<token FORM="," POS="FF" LEMMA="," />
<token FORM="detta" POS="Vip3s" LEMMA="dettare" />
<token FORM="manuale" POS="Ans" LEMMA="manuale" />
<token FORM="." POS="FS" LEMMA="." />
>>> _

Online Documentation