Initial commit
This commit is contained in:
605
venv/lib/python3.7/site-packages/nltk/test/wordnet.doctest
Normal file
605
venv/lib/python3.7/site-packages/nltk/test/wordnet.doctest
Normal file
@@ -0,0 +1,605 @@
|
||||
.. Copyright (C) 2001-2019 NLTK Project
|
||||
.. For license information, see LICENSE.TXT
|
||||
|
||||
=================
|
||||
WordNet Interface
|
||||
=================
|
||||
|
||||
WordNet is just another NLTK corpus reader, and can be imported like this:
|
||||
>>> from __future__ import print_function, unicode_literals
|
||||
>>> from nltk.corpus import wordnet
|
||||
|
||||
For more compact code, we recommend:
|
||||
|
||||
>>> from nltk.corpus import wordnet as wn
|
||||
|
||||
-----
|
||||
Words
|
||||
-----
|
||||
|
||||
Look up a word using ``synsets()``; this function has an optional ``pos`` argument
|
||||
which lets you constrain the part of speech of the word:
|
||||
|
||||
>>> wn.synsets('dog') # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE
|
||||
[Synset('dog.n.01'), Synset('frump.n.01'), Synset('dog.n.03'), Synset('cad.n.01'),
|
||||
Synset('frank.n.02'), Synset('pawl.n.01'), Synset('andiron.n.01'), Synset('chase.v.01')]
|
||||
>>> wn.synsets('dog', pos=wn.VERB)
|
||||
[Synset('chase.v.01')]
|
||||
|
||||
The other parts of speech are ``NOUN``, ``ADJ`` and ``ADV``.
|
||||
A synset is identified with a 3-part name of the form: word.pos.nn:
|
||||
|
||||
>>> wn.synset('dog.n.01')
|
||||
Synset('dog.n.01')
|
||||
>>> print(wn.synset('dog.n.01').definition())
|
||||
a member of the genus Canis (probably descended from the common wolf) that has been domesticated by man since prehistoric times; occurs in many breeds
|
||||
>>> len(wn.synset('dog.n.01').examples())
|
||||
1
|
||||
>>> print(wn.synset('dog.n.01').examples()[0])
|
||||
the dog barked all night
|
||||
>>> wn.synset('dog.n.01').lemmas()
|
||||
[Lemma('dog.n.01.dog'), Lemma('dog.n.01.domestic_dog'), Lemma('dog.n.01.Canis_familiaris')]
|
||||
>>> [str(lemma.name()) for lemma in wn.synset('dog.n.01').lemmas()]
|
||||
['dog', 'domestic_dog', 'Canis_familiaris']
|
||||
>>> wn.lemma('dog.n.01.dog').synset()
|
||||
Synset('dog.n.01')
|
||||
|
||||
The WordNet corpus reader gives access to the Open Multilingual
|
||||
WordNet, using ISO-639 language codes.
|
||||
|
||||
>>> sorted(wn.langs()) # doctest: +NORMALIZE_WHITESPACE
|
||||
['als', 'arb', 'bul', 'cat', 'cmn', 'dan', 'ell', 'eng', 'eus', 'fas',
|
||||
'fin', 'fra', 'glg', 'heb', 'hrv', 'ind', 'ita', 'jpn', 'nld', 'nno',
|
||||
'nob', 'pol', 'por', 'qcn', 'slv', 'spa', 'swe', 'tha', 'zsm']
|
||||
>>> wn.synsets(b'\xe7\x8a\xac'.decode('utf-8'), lang='jpn')
|
||||
[Synset('dog.n.01'), Synset('spy.n.01')]
|
||||
|
||||
wn.synset('spy.n.01').lemma_names('jpn') # doctest: +NORMALIZE_WHITESPACE
|
||||
['\u3044\u306c', '\u307e\u308f\u3057\u8005', '\u30b9\u30d1\u30a4', '\u56de\u3057\u8005',
|
||||
'\u56de\u8005', '\u5bc6\u5075', '\u5de5\u4f5c\u54e1', '\u5efb\u3057\u8005',
|
||||
'\u5efb\u8005', '\u63a2', '\u63a2\u308a', '\u72ac', '\u79d8\u5bc6\u635c\u67fb\u54e1',
|
||||
'\u8adc\u5831\u54e1', '\u8adc\u8005', '\u9593\u8005', '\u9593\u8adc', '\u96a0\u5bc6']
|
||||
|
||||
>>> wn.synset('dog.n.01').lemma_names('ita')
|
||||
['cane', 'Canis_familiaris']
|
||||
>>> wn.lemmas('cane', lang='ita') # doctest: +NORMALIZE_WHITESPACE
|
||||
[Lemma('dog.n.01.cane'), Lemma('cramp.n.02.cane'), Lemma('hammer.n.01.cane'), Lemma('bad_person.n.01.cane'),
|
||||
Lemma('incompetent.n.01.cane')]
|
||||
>>> sorted(wn.synset('dog.n.01').lemmas('dan')) # doctest: +NORMALIZE_WHITESPACE
|
||||
[Lemma('dog.n.01.hund'), Lemma('dog.n.01.k\xf8ter'),
|
||||
Lemma('dog.n.01.vovhund'), Lemma('dog.n.01.vovse')]
|
||||
|
||||
sorted(wn.synset('dog.n.01').lemmas('por'))
|
||||
[Lemma('dog.n.01.cachorra'), Lemma('dog.n.01.cachorro'), Lemma('dog.n.01.cadela'), Lemma('dog.n.01.c\xe3o')]
|
||||
|
||||
>>> dog_lemma = wn.lemma(b'dog.n.01.c\xc3\xa3o'.decode('utf-8'), lang='por')
|
||||
>>> dog_lemma
|
||||
Lemma('dog.n.01.c\xe3o')
|
||||
>>> dog_lemma.lang()
|
||||
'por'
|
||||
>>> len(list(wordnet.all_lemma_names(pos='n', lang='jpn')))
|
||||
64797
|
||||
|
||||
-------
|
||||
Synsets
|
||||
-------
|
||||
|
||||
`Synset`: a set of synonyms that share a common meaning.
|
||||
|
||||
>>> dog = wn.synset('dog.n.01')
|
||||
>>> dog.hypernyms()
|
||||
[Synset('canine.n.02'), Synset('domestic_animal.n.01')]
|
||||
>>> dog.hyponyms() # doctest: +ELLIPSIS
|
||||
[Synset('basenji.n.01'), Synset('corgi.n.01'), Synset('cur.n.01'), Synset('dalmatian.n.02'), ...]
|
||||
>>> dog.member_holonyms()
|
||||
[Synset('canis.n.01'), Synset('pack.n.06')]
|
||||
>>> dog.root_hypernyms()
|
||||
[Synset('entity.n.01')]
|
||||
>>> wn.synset('dog.n.01').lowest_common_hypernyms(wn.synset('cat.n.01'))
|
||||
[Synset('carnivore.n.01')]
|
||||
|
||||
Each synset contains one or more lemmas, which represent a specific
|
||||
sense of a specific word.
|
||||
|
||||
Note that some relations are defined by WordNet only over Lemmas:
|
||||
|
||||
>>> good = wn.synset('good.a.01')
|
||||
>>> good.antonyms()
|
||||
Traceback (most recent call last):
|
||||
File "<stdin>", line 1, in <module>
|
||||
AttributeError: 'Synset' object has no attribute 'antonyms'
|
||||
>>> good.lemmas()[0].antonyms()
|
||||
[Lemma('bad.a.01.bad')]
|
||||
|
||||
The relations that are currently defined in this way are `antonyms`,
|
||||
`derivationally_related_forms` and `pertainyms`.
|
||||
|
||||
If you know the byte offset used to identify a synset in the original
|
||||
Princeton WordNet data file, you can use that to instantiate the synset
|
||||
in NLTK:
|
||||
|
||||
>>> wn.synset_from_pos_and_offset('n', 4543158)
|
||||
Synset('wagon.n.01')
|
||||
|
||||
------
|
||||
Lemmas
|
||||
------
|
||||
|
||||
>>> eat = wn.lemma('eat.v.03.eat')
|
||||
>>> eat
|
||||
Lemma('feed.v.06.eat')
|
||||
>>> print(eat.key())
|
||||
eat%2:34:02::
|
||||
>>> eat.count()
|
||||
4
|
||||
>>> wn.lemma_from_key(eat.key())
|
||||
Lemma('feed.v.06.eat')
|
||||
>>> wn.lemma_from_key(eat.key()).synset()
|
||||
Synset('feed.v.06')
|
||||
>>> wn.lemma_from_key('feebleminded%5:00:00:retarded:00')
|
||||
Lemma('backward.s.03.feebleminded')
|
||||
>>> for lemma in wn.synset('eat.v.03').lemmas():
|
||||
... print(lemma, lemma.count())
|
||||
...
|
||||
Lemma('feed.v.06.feed') 3
|
||||
Lemma('feed.v.06.eat') 4
|
||||
>>> for lemma in wn.lemmas('eat', 'v'):
|
||||
... print(lemma, lemma.count())
|
||||
...
|
||||
Lemma('eat.v.01.eat') 61
|
||||
Lemma('eat.v.02.eat') 13
|
||||
Lemma('feed.v.06.eat') 4
|
||||
Lemma('eat.v.04.eat') 0
|
||||
Lemma('consume.v.05.eat') 0
|
||||
Lemma('corrode.v.01.eat') 0
|
||||
>>> wn.lemma('jump.v.11.jump')
|
||||
Lemma('jump.v.11.jump')
|
||||
|
||||
Lemmas can also have relations between them:
|
||||
|
||||
>>> vocal = wn.lemma('vocal.a.01.vocal')
|
||||
>>> vocal.derivationally_related_forms()
|
||||
[Lemma('vocalize.v.02.vocalize')]
|
||||
>>> vocal.pertainyms()
|
||||
[Lemma('voice.n.02.voice')]
|
||||
>>> vocal.antonyms()
|
||||
[Lemma('instrumental.a.01.instrumental')]
|
||||
|
||||
The three relations above exist only on lemmas, not on synsets.
|
||||
|
||||
-----------
|
||||
Verb Frames
|
||||
-----------
|
||||
|
||||
>>> wn.synset('think.v.01').frame_ids()
|
||||
[5, 9]
|
||||
>>> for lemma in wn.synset('think.v.01').lemmas():
|
||||
... print(lemma, lemma.frame_ids())
|
||||
... print(" | ".join(lemma.frame_strings()))
|
||||
...
|
||||
Lemma('think.v.01.think') [5, 9]
|
||||
Something think something Adjective/Noun | Somebody think somebody
|
||||
Lemma('think.v.01.believe') [5, 9]
|
||||
Something believe something Adjective/Noun | Somebody believe somebody
|
||||
Lemma('think.v.01.consider') [5, 9]
|
||||
Something consider something Adjective/Noun | Somebody consider somebody
|
||||
Lemma('think.v.01.conceive') [5, 9]
|
||||
Something conceive something Adjective/Noun | Somebody conceive somebody
|
||||
>>> wn.synset('stretch.v.02').frame_ids()
|
||||
[8]
|
||||
>>> for lemma in wn.synset('stretch.v.02').lemmas():
|
||||
... print(lemma, lemma.frame_ids())
|
||||
... print(" | ".join(lemma.frame_strings()))
|
||||
...
|
||||
Lemma('stretch.v.02.stretch') [8, 2]
|
||||
Somebody stretch something | Somebody stretch
|
||||
Lemma('stretch.v.02.extend') [8]
|
||||
Somebody extend something
|
||||
|
||||
|
||||
----------
|
||||
Similarity
|
||||
----------
|
||||
|
||||
>>> dog = wn.synset('dog.n.01')
|
||||
>>> cat = wn.synset('cat.n.01')
|
||||
|
||||
>>> hit = wn.synset('hit.v.01')
|
||||
>>> slap = wn.synset('slap.v.01')
|
||||
|
||||
|
||||
``synset1.path_similarity(synset2):``
|
||||
Return a score denoting how similar two word senses are, based on the
|
||||
shortest path that connects the senses in the is-a (hypernym/hypnoym)
|
||||
taxonomy. The score is in the range 0 to 1. By default, there is now
|
||||
a fake root node added to verbs so for cases where previously a path
|
||||
could not be found---and None was returned---it should return a value.
|
||||
The old behavior can be achieved by setting simulate_root to be False.
|
||||
A score of 1 represents identity i.e. comparing a sense with itself
|
||||
will return 1.
|
||||
|
||||
>>> dog.path_similarity(cat) # doctest: +ELLIPSIS
|
||||
0.2...
|
||||
|
||||
>>> hit.path_similarity(slap) # doctest: +ELLIPSIS
|
||||
0.142...
|
||||
|
||||
>>> wn.path_similarity(hit, slap) # doctest: +ELLIPSIS
|
||||
0.142...
|
||||
|
||||
>>> print(hit.path_similarity(slap, simulate_root=False))
|
||||
None
|
||||
|
||||
>>> print(wn.path_similarity(hit, slap, simulate_root=False))
|
||||
None
|
||||
|
||||
``synset1.lch_similarity(synset2):``
|
||||
Leacock-Chodorow Similarity:
|
||||
Return a score denoting how similar two word senses are, based on the
|
||||
shortest path that connects the senses (as above) and the maximum depth
|
||||
of the taxonomy in which the senses occur. The relationship is given
|
||||
as -log(p/2d) where p is the shortest path length and d the taxonomy
|
||||
depth.
|
||||
|
||||
>>> dog.lch_similarity(cat) # doctest: +ELLIPSIS
|
||||
2.028...
|
||||
|
||||
>>> hit.lch_similarity(slap) # doctest: +ELLIPSIS
|
||||
1.312...
|
||||
|
||||
>>> wn.lch_similarity(hit, slap) # doctest: +ELLIPSIS
|
||||
1.312...
|
||||
|
||||
>>> print(hit.lch_similarity(slap, simulate_root=False))
|
||||
None
|
||||
|
||||
>>> print(wn.lch_similarity(hit, slap, simulate_root=False))
|
||||
None
|
||||
|
||||
``synset1.wup_similarity(synset2):``
|
||||
Wu-Palmer Similarity:
|
||||
Return a score denoting how similar two word senses are, based on the
|
||||
depth of the two senses in the taxonomy and that of their Least Common
|
||||
Subsumer (most specific ancestor node). Note that at this time the
|
||||
scores given do _not_ always agree with those given by Pedersen's Perl
|
||||
implementation of Wordnet Similarity.
|
||||
|
||||
The LCS does not necessarily feature in the shortest path connecting the
|
||||
two senses, as it is by definition the common ancestor deepest in the
|
||||
taxonomy, not closest to the two senses. Typically, however, it will so
|
||||
feature. Where multiple candidates for the LCS exist, that whose
|
||||
shortest path to the root node is the longest will be selected. Where
|
||||
the LCS has multiple paths to the root, the longer path is used for
|
||||
the purposes of the calculation.
|
||||
|
||||
>>> dog.wup_similarity(cat) # doctest: +ELLIPSIS
|
||||
0.857...
|
||||
|
||||
>>> hit.wup_similarity(slap)
|
||||
0.25
|
||||
|
||||
>>> wn.wup_similarity(hit, slap)
|
||||
0.25
|
||||
|
||||
>>> print(hit.wup_similarity(slap, simulate_root=False))
|
||||
None
|
||||
|
||||
>>> print(wn.wup_similarity(hit, slap, simulate_root=False))
|
||||
None
|
||||
|
||||
``wordnet_ic``
|
||||
Information Content:
|
||||
Load an information content file from the wordnet_ic corpus.
|
||||
|
||||
>>> from nltk.corpus import wordnet_ic
|
||||
>>> brown_ic = wordnet_ic.ic('ic-brown.dat')
|
||||
>>> semcor_ic = wordnet_ic.ic('ic-semcor.dat')
|
||||
|
||||
Or you can create an information content dictionary from a corpus (or
|
||||
anything that has a words() method).
|
||||
|
||||
>>> from nltk.corpus import genesis
|
||||
>>> genesis_ic = wn.ic(genesis, False, 0.0)
|
||||
|
||||
``synset1.res_similarity(synset2, ic):``
|
||||
Resnik Similarity:
|
||||
Return a score denoting how similar two word senses are, based on the
|
||||
Information Content (IC) of the Least Common Subsumer (most specific
|
||||
ancestor node). Note that for any similarity measure that uses
|
||||
information content, the result is dependent on the corpus used to
|
||||
generate the information content and the specifics of how the
|
||||
information content was created.
|
||||
|
||||
>>> dog.res_similarity(cat, brown_ic) # doctest: +ELLIPSIS
|
||||
7.911...
|
||||
>>> dog.res_similarity(cat, genesis_ic) # doctest: +ELLIPSIS
|
||||
7.204...
|
||||
|
||||
``synset1.jcn_similarity(synset2, ic):``
|
||||
Jiang-Conrath Similarity
|
||||
Return a score denoting how similar two word senses are, based on the
|
||||
Information Content (IC) of the Least Common Subsumer (most specific
|
||||
ancestor node) and that of the two input Synsets. The relationship is
|
||||
given by the equation 1 / (IC(s1) + IC(s2) - 2 * IC(lcs)).
|
||||
|
||||
>>> dog.jcn_similarity(cat, brown_ic) # doctest: +ELLIPSIS
|
||||
0.449...
|
||||
>>> dog.jcn_similarity(cat, genesis_ic) # doctest: +ELLIPSIS
|
||||
0.285...
|
||||
|
||||
``synset1.lin_similarity(synset2, ic):``
|
||||
Lin Similarity:
|
||||
Return a score denoting how similar two word senses are, based on the
|
||||
Information Content (IC) of the Least Common Subsumer (most specific
|
||||
ancestor node) and that of the two input Synsets. The relationship is
|
||||
given by the equation 2 * IC(lcs) / (IC(s1) + IC(s2)).
|
||||
|
||||
>>> dog.lin_similarity(cat, semcor_ic) # doctest: +ELLIPSIS
|
||||
0.886...
|
||||
|
||||
|
||||
---------------------
|
||||
Access to all Synsets
|
||||
---------------------
|
||||
|
||||
Iterate over all the noun synsets:
|
||||
|
||||
>>> for synset in list(wn.all_synsets('n'))[:10]:
|
||||
... print(synset)
|
||||
...
|
||||
Synset('entity.n.01')
|
||||
Synset('physical_entity.n.01')
|
||||
Synset('abstraction.n.06')
|
||||
Synset('thing.n.12')
|
||||
Synset('object.n.01')
|
||||
Synset('whole.n.02')
|
||||
Synset('congener.n.03')
|
||||
Synset('living_thing.n.01')
|
||||
Synset('organism.n.01')
|
||||
Synset('benthos.n.02')
|
||||
|
||||
Get all synsets for this word, possibly restricted by POS:
|
||||
|
||||
>>> wn.synsets('dog') # doctest: +ELLIPSIS
|
||||
[Synset('dog.n.01'), Synset('frump.n.01'), Synset('dog.n.03'), Synset('cad.n.01'), ...]
|
||||
>>> wn.synsets('dog', pos='v')
|
||||
[Synset('chase.v.01')]
|
||||
|
||||
Walk through the noun synsets looking at their hypernyms:
|
||||
|
||||
>>> from itertools import islice
|
||||
>>> for synset in islice(wn.all_synsets('n'), 5):
|
||||
... print(synset, synset.hypernyms())
|
||||
...
|
||||
Synset('entity.n.01') []
|
||||
Synset('physical_entity.n.01') [Synset('entity.n.01')]
|
||||
Synset('abstraction.n.06') [Synset('entity.n.01')]
|
||||
Synset('thing.n.12') [Synset('physical_entity.n.01')]
|
||||
Synset('object.n.01') [Synset('physical_entity.n.01')]
|
||||
|
||||
|
||||
------
|
||||
Morphy
|
||||
------
|
||||
|
||||
Look up forms not in WordNet, with the help of Morphy:
|
||||
|
||||
>>> wn.morphy('denied', wn.NOUN)
|
||||
>>> print(wn.morphy('denied', wn.VERB))
|
||||
deny
|
||||
>>> wn.synsets('denied', wn.NOUN)
|
||||
[]
|
||||
>>> wn.synsets('denied', wn.VERB) # doctest: +NORMALIZE_WHITESPACE
|
||||
[Synset('deny.v.01'), Synset('deny.v.02'), Synset('deny.v.03'), Synset('deny.v.04'),
|
||||
Synset('deny.v.05'), Synset('traverse.v.03'), Synset('deny.v.07')]
|
||||
|
||||
Morphy uses a combination of inflectional ending rules and exception
|
||||
lists to handle a variety of different possibilities:
|
||||
|
||||
>>> print(wn.morphy('dogs'))
|
||||
dog
|
||||
>>> print(wn.morphy('churches'))
|
||||
church
|
||||
>>> print(wn.morphy('aardwolves'))
|
||||
aardwolf
|
||||
>>> print(wn.morphy('abaci'))
|
||||
abacus
|
||||
>>> print(wn.morphy('book', wn.NOUN))
|
||||
book
|
||||
>>> wn.morphy('hardrock', wn.ADV)
|
||||
>>> wn.morphy('book', wn.ADJ)
|
||||
>>> wn.morphy('his', wn.NOUN)
|
||||
>>>
|
||||
|
||||
---------------
|
||||
Synset Closures
|
||||
---------------
|
||||
|
||||
Compute transitive closures of synsets
|
||||
|
||||
>>> dog = wn.synset('dog.n.01')
|
||||
>>> hypo = lambda s: s.hyponyms()
|
||||
>>> hyper = lambda s: s.hypernyms()
|
||||
>>> list(dog.closure(hypo, depth=1)) == dog.hyponyms()
|
||||
True
|
||||
>>> list(dog.closure(hyper, depth=1)) == dog.hypernyms()
|
||||
True
|
||||
>>> list(dog.closure(hypo)) # doctest: +NORMALIZE_WHITESPACE, +ELLIPSIS
|
||||
[Synset('basenji.n.01'), Synset('corgi.n.01'), Synset('cur.n.01'),
|
||||
Synset('dalmatian.n.02'), Synset('great_pyrenees.n.01'),
|
||||
Synset('griffon.n.02'), Synset('hunting_dog.n.01'), Synset('lapdog.n.01'),
|
||||
Synset('leonberg.n.01'), Synset('mexican_hairless.n.01'),
|
||||
Synset('newfoundland.n.01'), Synset('pooch.n.01'), Synset('poodle.n.01'), ...]
|
||||
>>> list(dog.closure(hyper)) # doctest: +NORMALIZE_WHITESPACE
|
||||
[Synset('canine.n.02'), Synset('domestic_animal.n.01'), Synset('carnivore.n.01'), Synset('animal.n.01'),
|
||||
Synset('placental.n.01'), Synset('organism.n.01'), Synset('mammal.n.01'), Synset('living_thing.n.01'),
|
||||
Synset('vertebrate.n.01'), Synset('whole.n.02'), Synset('chordate.n.01'), Synset('object.n.01'),
|
||||
Synset('physical_entity.n.01'), Synset('entity.n.01')]
|
||||
|
||||
|
||||
----------------
|
||||
Regression Tests
|
||||
----------------
|
||||
|
||||
Bug 85: morphy returns the base form of a word, if it's input is given
|
||||
as a base form for a POS for which that word is not defined:
|
||||
|
||||
>>> wn.synsets('book', wn.NOUN)
|
||||
[Synset('book.n.01'), Synset('book.n.02'), Synset('record.n.05'), Synset('script.n.01'), Synset('ledger.n.01'), Synset('book.n.06'), Synset('book.n.07'), Synset('koran.n.01'), Synset('bible.n.01'), Synset('book.n.10'), Synset('book.n.11')]
|
||||
>>> wn.synsets('book', wn.ADJ)
|
||||
[]
|
||||
>>> wn.morphy('book', wn.NOUN)
|
||||
'book'
|
||||
>>> wn.morphy('book', wn.ADJ)
|
||||
|
||||
Bug 160: wup_similarity breaks when the two synsets have no common hypernym
|
||||
|
||||
>>> t = wn.synsets('picasso')[0]
|
||||
>>> m = wn.synsets('male')[1]
|
||||
>>> t.wup_similarity(m) # doctest: +ELLIPSIS
|
||||
0.631...
|
||||
|
||||
>>> t = wn.synsets('titan')[1]
|
||||
>>> s = wn.synsets('say', wn.VERB)[0]
|
||||
>>> print(t.wup_similarity(s))
|
||||
None
|
||||
|
||||
Bug 21: "instance of" not included in LCS (very similar to bug 160)
|
||||
|
||||
>>> a = wn.synsets("writings")[0]
|
||||
>>> b = wn.synsets("scripture")[0]
|
||||
>>> brown_ic = wordnet_ic.ic('ic-brown.dat')
|
||||
>>> a.jcn_similarity(b, brown_ic) # doctest: +ELLIPSIS
|
||||
0.175...
|
||||
|
||||
Bug 221: Verb root IC is zero
|
||||
|
||||
>>> from nltk.corpus.reader.wordnet import information_content
|
||||
>>> s = wn.synsets('say', wn.VERB)[0]
|
||||
>>> information_content(s, brown_ic) # doctest: +ELLIPSIS
|
||||
4.623...
|
||||
|
||||
Bug 161: Comparison between WN keys/lemmas should not be case sensitive
|
||||
|
||||
>>> k = wn.synsets("jefferson")[0].lemmas()[0].key()
|
||||
>>> wn.lemma_from_key(k)
|
||||
Lemma('jefferson.n.01.Jefferson')
|
||||
>>> wn.lemma_from_key(k.upper())
|
||||
Lemma('jefferson.n.01.Jefferson')
|
||||
|
||||
Bug 99: WordNet root_hypernyms gives incorrect results
|
||||
|
||||
>>> from nltk.corpus import wordnet as wn
|
||||
>>> for s in wn.all_synsets(wn.NOUN):
|
||||
... if s.root_hypernyms()[0] != wn.synset('entity.n.01'):
|
||||
... print(s, s.root_hypernyms())
|
||||
...
|
||||
>>>
|
||||
|
||||
Bug 382: JCN Division by zero error
|
||||
|
||||
>>> tow = wn.synset('tow.v.01')
|
||||
>>> shlep = wn.synset('shlep.v.02')
|
||||
>>> from nltk.corpus import wordnet_ic
|
||||
>>> brown_ic = wordnet_ic.ic('ic-brown.dat')
|
||||
>>> tow.jcn_similarity(shlep, brown_ic) # doctest: +ELLIPSIS
|
||||
1...e+300
|
||||
|
||||
Bug 428: Depth is zero for instance nouns
|
||||
|
||||
>>> s = wn.synset("lincoln.n.01")
|
||||
>>> s.max_depth() > 0
|
||||
True
|
||||
|
||||
Bug 429: Information content smoothing used old reference to all_synsets
|
||||
|
||||
>>> genesis_ic = wn.ic(genesis, True, 1.0)
|
||||
|
||||
Bug 430: all_synsets used wrong pos lookup when synsets were cached
|
||||
|
||||
>>> for ii in wn.all_synsets(): pass
|
||||
>>> for ii in wn.all_synsets(): pass
|
||||
|
||||
Bug 470: shortest_path_distance ignored instance hypernyms
|
||||
|
||||
>>> google = wordnet.synsets("google")[0]
|
||||
>>> earth = wordnet.synsets("earth")[0]
|
||||
>>> google.wup_similarity(earth) # doctest: +ELLIPSIS
|
||||
0.1...
|
||||
|
||||
Bug 484: similarity metrics returned -1 instead of None for no LCS
|
||||
|
||||
>>> t = wn.synsets('fly', wn.VERB)[0]
|
||||
>>> s = wn.synsets('say', wn.VERB)[0]
|
||||
>>> print(s.shortest_path_distance(t))
|
||||
None
|
||||
>>> print(s.path_similarity(t, simulate_root=False))
|
||||
None
|
||||
>>> print(s.lch_similarity(t, simulate_root=False))
|
||||
None
|
||||
>>> print(s.wup_similarity(t, simulate_root=False))
|
||||
None
|
||||
|
||||
Bug 427: "pants" does not return all the senses it should
|
||||
|
||||
>>> from nltk.corpus import wordnet
|
||||
>>> wordnet.synsets("pants",'n')
|
||||
[Synset('bloomers.n.01'), Synset('pant.n.01'), Synset('trouser.n.01'), Synset('gasp.n.01')]
|
||||
|
||||
Bug 482: Some nouns not being lemmatised by WordNetLemmatizer().lemmatize
|
||||
|
||||
>>> from nltk.stem.wordnet import WordNetLemmatizer
|
||||
>>> WordNetLemmatizer().lemmatize("eggs", pos="n")
|
||||
'egg'
|
||||
>>> WordNetLemmatizer().lemmatize("legs", pos="n")
|
||||
'leg'
|
||||
|
||||
Bug 284: instance hypernyms not used in similarity calculations
|
||||
|
||||
>>> wn.synset('john.n.02').lch_similarity(wn.synset('dog.n.01')) # doctest: +ELLIPSIS
|
||||
1.335...
|
||||
>>> wn.synset('john.n.02').wup_similarity(wn.synset('dog.n.01')) # doctest: +ELLIPSIS
|
||||
0.571...
|
||||
>>> wn.synset('john.n.02').res_similarity(wn.synset('dog.n.01'), brown_ic) # doctest: +ELLIPSIS
|
||||
2.224...
|
||||
>>> wn.synset('john.n.02').jcn_similarity(wn.synset('dog.n.01'), brown_ic) # doctest: +ELLIPSIS
|
||||
0.075...
|
||||
>>> wn.synset('john.n.02').lin_similarity(wn.synset('dog.n.01'), brown_ic) # doctest: +ELLIPSIS
|
||||
0.252...
|
||||
>>> wn.synset('john.n.02').hypernym_paths() # doctest: +ELLIPSIS
|
||||
[[Synset('entity.n.01'), ..., Synset('john.n.02')]]
|
||||
|
||||
Issue 541: add domains to wordnet
|
||||
|
||||
>>> wn.synset('code.n.03').topic_domains()
|
||||
[Synset('computer_science.n.01')]
|
||||
>>> wn.synset('pukka.a.01').region_domains()
|
||||
[Synset('india.n.01')]
|
||||
>>> wn.synset('freaky.a.01').usage_domains()
|
||||
[Synset('slang.n.02')]
|
||||
|
||||
Issue 629: wordnet failures when python run with -O optimizations
|
||||
|
||||
>>> # Run the test suite with python -O to check this
|
||||
>>> wn.synsets("brunch")
|
||||
[Synset('brunch.n.01'), Synset('brunch.v.01')]
|
||||
|
||||
Issue 395: wordnet returns incorrect result for lowest_common_hypernyms of chef and policeman
|
||||
|
||||
>>> wn.synset('policeman.n.01').lowest_common_hypernyms(wn.synset('chef.n.01'))
|
||||
[Synset('person.n.01')]
|
||||
|
||||
Bug https://github.com/nltk/nltk/issues/1641: Non-English lemmas containing capital letters cannot be looked up using wordnet.lemmas() or wordnet.synsets()
|
||||
|
||||
>>> wn.lemmas('Londres', lang='fra')
|
||||
[Lemma('united_kingdom.n.01.Londres'), Lemma('london.n.01.Londres'), Lemma('london.n.02.Londres')]
|
||||
>>> wn.lemmas('londres', lang='fra')
|
||||
[Lemma('united_kingdom.n.01.Londres'), Lemma('london.n.01.Londres'), Lemma('london.n.02.Londres')]
|
||||
|
||||
Patch-1 https://github.com/nltk/nltk/pull/2065 Adding 3 functions (relations) to WordNet class
|
||||
>>> wn.synsets("computer_science")[0].in_topic_domains()[2]
|
||||
Synset('access_time.n.01')
|
||||
>>> wn.synsets("France")[0].in_region_domains()[18]
|
||||
Synset('french.n.01')
|
||||
>>> wn.synsets("slang")[1].in_usage_domains()[18]
|
||||
Synset('can-do.s.01')
|
||||
Reference in New Issue
Block a user