piskvorky · mpenkov · Oct 28, 2020 · Oct 17, 2020 · Oct 17, 2020 · Oct 18, 2020
diff --git a/CHANGELOG.md b/CHANGELOG.md
diff --git a/gensim/models/callbacks.py b/gensim/models/callbacks.py
@@ -61,7 +61,7 @@
     ...
     >>>
     >>> epoch_logger = EpochLogger()
-    >>> w2v_model = Word2Vec(common_texts, iter=5, size=10, min_count=0, seed=42, callbacks=[epoch_logger])
+    >>> w2v_model = Word2Vec(common_texts, epochs=5, vector_size=10, min_count=0, seed=42, callbacks=[epoch_logger])
     Epoch #0 start
     Epoch #0 end
     Epoch #1 start

diff --git a/gensim/models/doc2vec.py b/gensim/models/doc2vec.py
@@ -751,7 +751,7 @@ def save_word2vec_format(self, fname, doctag_vec=False, word_vec=True, prefix='*
     @deprecated(
         "Gensim 4.0.0 implemented internal optimizations that make calls to init_sims() unnecessary. "
         "init_sims() is now obsoleted and will be completely removed in future versions. "
-        "See https://github.com/RaRe-Technologies/gensim/wiki/Migrating-from-Gensim-3.x-to-4#init_sims"
+        "See https://github.com/RaRe-Technologies/gensim/wiki/Migrating-from-Gensim-3.x-to-4"
     )
     def init_sims(self, replace=False):
         """

diff --git a/gensim/models/fasttext.py b/gensim/models/fasttext.py
@@ -30,7 +30,7 @@
 
 .. sourcecode:: pycon
 
-    >>> # from gensim.models import FastText  # FIXME: why does Sphinx dislike this import?
+    >>> from gensim.models import FastText
     >>> from gensim.test.utils import common_texts  # some example sentences
     >>>
     >>> print(common_texts[0])
@@ -50,16 +50,7 @@
 
 .. sourcecode:: pycon
 
-    >>> model2 = FastText(vector_size=4, window=3, min_count=1, sentences=common_texts, iter=10)
-
-.. Important::
-    This style of initialize-and-train in a single line is **deprecated**. We include it here
-    for backward compatibility only.
-
-    Please use the initialize-`build_vocab`-`train` pattern above instead, including using `epochs`
-    instead of `iter`.
-    The motivation is to simplify the API and resolve naming inconsistencies,
-    e.g. the iter parameter to the constructor is called epochs in the train function.
+    >>> model2 = FastText(vector_size=4, window=3, min_count=1, sentences=common_texts, epochs=10)
 
 The two models above are instantiated differently, but behave identically.
 For example, we can compare the embeddings they've calculated for the word "computer":
@@ -139,7 +130,7 @@
 
     >>> import numpy as np
     >>>
-    >>> 'computation' in model.wv.vocab  # New word, currently out of vocab
+    >>> 'computation' in model.wv.key_to_index  # New word, currently out of vocab
     False
     >>> old_vector = np.copy(model.wv['computation'])  # Grab the existing vector
     >>> new_sentences = [
@@ -157,7 +148,7 @@
     >>> new_vector = model.wv['computation']
     >>> np.allclose(old_vector, new_vector, atol=1e-4)  # Vector has changed, model has learnt something
     False
-    >>> 'computation' in model.wv.vocab  # Word is still out of vocab
+    >>> 'computation' in model.wv.key_to_index  # Word is still out of vocab
     False
 
 .. Important::
@@ -178,15 +169,15 @@
 
 .. sourcecode:: pycon
 
-    >>> 'computer' in fb_model.wv.vocab  # New word, currently out of vocab
+    >>> 'computer' in fb_model.wv.key_to_index  # New word, currently out of vocab
     False
     >>> old_computer = np.copy(fb_model.wv['computer'])  # Calculate current vectors
     >>> fb_model.build_vocab(new_sentences, update=True)
     >>> fb_model.train(new_sentences, total_examples=len(new_sentences), epochs=model.epochs)
     >>> new_computer = fb_model.wv['computer']
     >>> np.allclose(old_computer, new_computer, atol=1e-4)  # Vector has changed, model has learnt something
     False
-    >>> 'computer' in fb_model.wv.vocab  # New word is now in the vocabulary
+    >>> 'computer' in fb_model.wv.key_to_index  # New word is now in the vocabulary
     True
 
 If you do not intend to continue training the model, consider using the
@@ -200,25 +191,25 @@
     >>> cap_path = datapath("crime-and-punishment.bin")
     >>> wv = load_facebook_vectors(cap_path)
     >>>
-    >>> 'landlord' in wv.vocab  # Word is out of vocabulary
+    >>> 'landlord' in wv.key_to_index  # Word is out of vocabulary
     False
-    >>> oov_vector = wv['landlord']
+    >>> oov_vector = wv['landlord']  # Even OOV words have vectors in FastText
     >>>
-    >>> 'landlady' in wv.vocab  # Word is in the vocabulary
+    >>> 'landlady' in wv.key_to_index  # Word is in the vocabulary
     True
     >>> iv_vector = wv['landlady']
 
-Retrieve word-vector for vocab and out-of-vocab word:
+Retrieve the word-vector for vocab and out-of-vocab word:
 
 .. sourcecode:: pycon
 
     >>> existent_word = "computer"
-    >>> existent_word in model.wv.vocab
+    >>> existent_word in model.wv.key_to_index
     True
     >>> computer_vec = model.wv[existent_word]  # numpy vector of a word
     >>>
     >>> oov_word = "graph-out-of-vocab"
-    >>> oov_word in model.wv.vocab
+    >>> oov_word in model.wv.key_to_index
     False
     >>> oov_vec = model.wv[oov_word]  # numpy vector for OOV word
 
@@ -488,9 +479,9 @@ def estimate_memory(self, vocab_size=None, report=None):
                 hashes = ft_ngram_hashes(word, self.wv.min_n, self.wv.max_n, self.wv.bucket)
                 num_ngrams += len(hashes)
             # A list (64 bytes) with one np.array (100 bytes) per key, with a total of
-            # num_ngrams uint32s (4 bytes) amongst them
-            # Only used during training, not stored with the model
-            report['buckets_word'] = 64 + (100 * len(self.wv)) + (4 * num_ngrams)  # FIXME: caching & calc sensible?
+            # num_ngrams uint32s (4 bytes) amongst them.
+            # Only used during training, not stored with the model.
+            report['buckets_word'] = 64 + (100 * len(self.wv)) + (4 * num_ngrams)  # TODO: caching & calc sensible?
         report['total'] = sum(report.values())
         logger.info(
             "estimated required memory for %i words, %i buckets and %i dimensions: %i bytes",
@@ -541,7 +532,7 @@ def _do_train_job(self, sentences, alpha, inits):
     @deprecated(
         "Gensim 4.0.0 implemented internal optimizations that make calls to init_sims() unnecessary. "
         "init_sims() is now obsoleted and will be completely removed in future versions. "
-        "See https://github.com/RaRe-Technologies/gensim/wiki/Migrating-from-Gensim-3.x-to-4#init_sims"
+        "See https://github.com/RaRe-Technologies/gensim/wiki/Migrating-from-Gensim-3.x-to-4"
     )
     def init_sims(self, replace=False):
         """
@@ -699,11 +690,11 @@ def load_facebook_model(path, encoding='utf-8'):
         >>> cap_path = datapath("crime-and-punishment.bin")
         >>> fb_model = load_facebook_model(cap_path)
         >>>
-        >>> 'landlord' in fb_model.wv.vocab  # Word is out of vocabulary
+        >>> 'landlord' in fb_model.wv.key_to_index  # Word is out of vocabulary
         False
         >>> oov_term = fb_model.wv['landlord']
         >>>
-        >>> 'landlady' in fb_model.wv.vocab  # Word is in the vocabulary
+        >>> 'landlady' in fb_model.wv.key_to_index  # Word is in the vocabulary
         True
         >>> iv_term = fb_model.wv['landlady']
         >>>
@@ -764,11 +755,11 @@ def load_facebook_vectors(path, encoding='utf-8'):
         >>> cap_path = datapath("crime-and-punishment.bin")
         >>> fbkv = load_facebook_vectors(cap_path)
         >>>
-        >>> 'landlord' in fbkv.vocab  # Word is out of vocabulary
+        >>> 'landlord' in fbkv.key_to_index  # Word is out of vocabulary
         False
         >>> oov_vector = fbkv['landlord']
         >>>
-        >>> 'landlady' in fbkv.vocab  # Word is in the vocabulary
+        >>> 'landlady' in fbkv.key_to_index  # Word is in the vocabulary
         True
         >>> iv_vector = fbkv['landlady']
 
@@ -1193,7 +1184,7 @@ def recalc_char_ngram_buckets(self):
         Scan the vocabulary, calculate ngrams and their hashes, and cache the list of ngrams for each known word.
 
         """
-        # FIXME: evaluate if precaching even necessary, compared to recalculating as needed
+        # TODO: evaluate if precaching even necessary, compared to recalculating as needed.
         if self.bucket == 0:
             self.buckets_word = [np.array([], dtype=np.uint32)] * len(self.index_to_key)
             return