Using context (6.1.5)

This example to get previous word as well as suffix.

>>> train_set, test_set = featuresets[size:], featuresets[:size]
>>> classifier = nltk.DecisionTreeClassifier.train(train_set)
>>> 
>>> def pos_features(sentence, i):
...     features = {"suffix(1)": sentence[i][-1:],
...                 "suffix(2)": sentence[i][-2:],
...                 "suffix(3)": sentence[i][-3:]}
...     if i == 0:
...             features["prev-word"] = "<START>"
...     else:
...             features["prev-word"] = sentence[i-1]
...     return features
... 
>>> pos_features(brown.sents()[0],8)
{'suffix(3)': 'ion', 'prev-word': 'an', 'suffix(2)': 'on', 'suffix(1)': 'n'}
>>> tagged_sents = brown.tagged_sents(categories='news')
>>> featuresets = []
>>> for tagged_sent in tagged_sents:
...     untagged_sent = nltk.tag.untag(tagged_sent)
...     for i, (word, tag) in enumerate(tagged_sent):
...             featuresets.append((pos_features(untagged_sent, i), tag))
... 
>>> size = int(len(featuresets) * 0.1)
>>> train_set, test_set = featuresets[size:], featuresets[:size]
>>> classifier = nltk.NaiveBayesClassifier.train(train_set)
>>> nltk.classify.accuracy(classifier, test_set)
0.7891596220785678
Advertisements

Leave a Reply

Fill in your details below or click an icon to log in:

WordPress.com Logo

You are commenting using your WordPress.com account. Log Out / Change )

Twitter picture

You are commenting using your Twitter account. Log Out / Change )

Facebook photo

You are commenting using your Facebook account. Log Out / Change )

Google+ photo

You are commenting using your Google+ account. Log Out / Change )

Connecting to %s