Update changelog for 4.0.0 release (#2981)

* update changelog for 4.0.0 release * fixup * wip: cleaning up changelog * extend + clean up changelog * note the removal of deprecations in CHANGELOG * finish CHANGELOG - except removed modules, pending info from @mpenkov * CHANGELOG formatting fixes * fix outdated docs - found while updating the migration guide * update migration hyperlinks * fixing fixable FIXMEs, in preparation for 4.0.0beta * fixing iter + size in docstrings * fix typo * clean up logic & docs for KeyedVectors.save_word2vec_format * flake8 fix * py3k: `class X(object):` -> `class X:` * work around issues with flake8-rst * add issues without a PR * improve changelog script * simplify pagination * more flake8-rst fixing Co-authored-by: Radim Řehůřek <[email protected]>
piskvorky · Oct 28, 2020 · e4199cb · e4199cb
1 parent 60a8f7f
commit e4199cb
Show file tree

Hide file tree

Showing 54 changed files with 1,011 additions and 1,094 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
diff --git a/docs/src/auto_examples/core/run_corpora_and_vector_spaces.ipynb b/docs/src/auto_examples/core/run_corpora_and_vector_spaces.ipynb
@@ -152,7 +152,7 @@
       },
       "outputs": [],
       "source": [
-        "from smart_open import open  # for transparently opening remote files\n\n\nclass MyCorpus(object):\n    def __iter__(self):\n        for line in open('https://radimrehurek.com/gensim/mycorpus.txt'):\n            # assume there's one document per line, tokens separated by whitespace\n            yield dictionary.doc2bow(line.lower().split())"
+        "from smart_open import open  # for transparently opening remote files\n\n\nclass MyCorpus:\n    def __iter__(self):\n        for line in open('https://radimrehurek.com/gensim/mycorpus.txt'):\n            # assume there's one document per line, tokens separated by whitespace\n            yield dictionary.doc2bow(line.lower().split())"
       ]
     },
     {

diff --git a/docs/src/auto_examples/core/run_corpora_and_vector_spaces.py b/docs/src/auto_examples/core/run_corpora_and_vector_spaces.py
@@ -136,7 +136,7 @@
 from smart_open import open  # for transparently opening remote files
 
 
-class MyCorpus(object):
+class MyCorpus:
     def __iter__(self):
         for line in open('https://radimrehurek.com/gensim/mycorpus.txt'):
             # assume there's one document per line, tokens separated by whitespace

diff --git a/docs/src/auto_examples/core/run_corpora_and_vector_spaces.py.md5 b/docs/src/auto_examples/core/run_corpora_and_vector_spaces.py.md5
@@ -1 +1 @@
-c239d5c523ea2b3af1f6d4c6c51e7925
+6b98413399bca9fd1ed8fe420da85692
diff --git a/docs/src/auto_examples/core/run_corpora_and_vector_spaces.rst b/docs/src/auto_examples/core/run_corpora_and_vector_spaces.rst
@@ -159,10 +159,10 @@ between the questions and ids is called a dictionary:
 
  .. code-block:: none
 
-    2020-10-19 01:23:37,722 : INFO : adding document #0 to Dictionary(0 unique tokens: [])
-    2020-10-19 01:23:37,722 : INFO : built Dictionary(12 unique tokens: ['computer', 'human', 'interface', 'response', 'survey']...) from 9 documents (total 29 corpus positions)
-    2020-10-19 01:23:37,722 : INFO : saving Dictionary(12 unique tokens: ['computer', 'human', 'interface', 'response', 'survey']...) under /tmp/deerwester.dict, separately None
-    2020-10-19 01:23:37,723 : INFO : saved /tmp/deerwester.dict
+    2020-10-28 00:52:02,550 : INFO : adding document #0 to Dictionary(0 unique tokens: [])
+    2020-10-28 00:52:02,550 : INFO : built Dictionary(12 unique tokens: ['computer', 'human', 'interface', 'response', 'survey']...) from 9 documents (total 29 corpus positions)
+    2020-10-28 00:52:02,550 : INFO : saving Dictionary(12 unique tokens: ['computer', 'human', 'interface', 'response', 'survey']...) under /tmp/deerwester.dict, separately None
+    2020-10-28 00:52:02,552 : INFO : saved /tmp/deerwester.dict
     Dictionary(12 unique tokens: ['computer', 'human', 'interface', 'response', 'survey']...)
 
 
@@ -244,11 +244,11 @@ therefore reads: in the document `"Human computer interaction"`, the words `comp
 
  .. code-block:: none
 
-    2020-10-19 01:23:38,012 : INFO : storing corpus in Matrix Market format to /tmp/deerwester.mm
-    2020-10-19 01:23:38,013 : INFO : saving sparse matrix to /tmp/deerwester.mm
-    2020-10-19 01:23:38,013 : INFO : PROGRESS: saving document #0
-    2020-10-19 01:23:38,016 : INFO : saved 9x12 matrix, density=25.926% (28/108)
-    2020-10-19 01:23:38,016 : INFO : saving MmCorpus index to /tmp/deerwester.mm.index
+    2020-10-28 00:52:02,830 : INFO : storing corpus in Matrix Market format to /tmp/deerwester.mm
+    2020-10-28 00:52:02,832 : INFO : saving sparse matrix to /tmp/deerwester.mm
+    2020-10-28 00:52:02,832 : INFO : PROGRESS: saving document #0
+    2020-10-28 00:52:02,834 : INFO : saved 9x12 matrix, density=25.926% (28/108)
+    2020-10-28 00:52:02,834 : INFO : saving MmCorpus index to /tmp/deerwester.mm.index
     [[(0, 1), (1, 1), (2, 1)], [(0, 1), (3, 1), (4, 1), (5, 1), (6, 1), (7, 1)], [(2, 1), (5, 1), (7, 1), (8, 1)], [(1, 1), (5, 2), (8, 1)], [(3, 1), (6, 1), (7, 1)], [(9, 1)], [(9, 1), (10, 1)], [(9, 1), (10, 1), (11, 1)], [(4, 1), (10, 1), (11, 1)]]
 
 
@@ -276,7 +276,7 @@ only requires that a corpus must be able to return one document vector at a time
     from smart_open import open  # for transparently opening remote files
 
 
-    class MyCorpus(object):
+    class MyCorpus:
         def __iter__(self):
             for line in open('https://radimrehurek.com/gensim/mycorpus.txt'):
                 # assume there's one document per line, tokens separated by whitespace
@@ -334,7 +334,7 @@ then convert the tokens via a dictionary to their ids and yield the resulting sp
 
  .. code-block:: none
 
-    <__main__.MyCorpus object at 0x117e06828>
+    <__main__.MyCorpus object at 0x11e77bb38>
 
 
 
@@ -406,8 +406,8 @@ Similarly, to construct the dictionary without loading all texts into memory:
 
  .. code-block:: none
 
-    2020-10-19 01:23:38,980 : INFO : adding document #0 to Dictionary(0 unique tokens: [])
-    2020-10-19 01:23:38,981 : INFO : built Dictionary(42 unique tokens: ['abc', 'applications', 'computer', 'for', 'human']...) from 9 documents (total 69 corpus positions)
+    2020-10-28 00:52:04,241 : INFO : adding document #0 to Dictionary(0 unique tokens: [])
+    2020-10-28 00:52:04,243 : INFO : built Dictionary(42 unique tokens: ['abc', 'applications', 'computer', 'for', 'human']...) from 9 documents (total 69 corpus positions)
     Dictionary(12 unique tokens: ['computer', 'human', 'interface', 'response', 'survey']...)
 
 
@@ -454,11 +454,11 @@ create a toy corpus of 2 documents, as a plain Python list
 
  .. code-block:: none
 
-    2020-10-19 01:23:39,099 : INFO : storing corpus in Matrix Market format to /tmp/corpus.mm
-    2020-10-19 01:23:39,100 : INFO : saving sparse matrix to /tmp/corpus.mm
-    2020-10-19 01:23:39,100 : INFO : PROGRESS: saving document #0
-    2020-10-19 01:23:39,101 : INFO : saved 2x2 matrix, density=25.000% (1/4)
-    2020-10-19 01:23:39,101 : INFO : saving MmCorpus index to /tmp/corpus.mm.index
+    2020-10-28 00:52:04,368 : INFO : storing corpus in Matrix Market format to /tmp/corpus.mm
+    2020-10-28 00:52:04,370 : INFO : saving sparse matrix to /tmp/corpus.mm
+    2020-10-28 00:52:04,370 : INFO : PROGRESS: saving document #0
+    2020-10-28 00:52:04,370 : INFO : saved 2x2 matrix, density=25.000% (1/4)
+    2020-10-28 00:52:04,370 : INFO : saving MmCorpus index to /tmp/corpus.mm.index
 
 
 
@@ -486,16 +486,16 @@ Other formats include `Joachim's SVMlight format <http://svmlight.joachims.org/>
 
  .. code-block:: none
 
-    2020-10-19 01:23:39,152 : INFO : converting corpus to SVMlight format: /tmp/corpus.svmlight
-    2020-10-19 01:23:39,153 : INFO : saving SvmLightCorpus index to /tmp/corpus.svmlight.index
-    2020-10-19 01:23:39,154 : INFO : no word id mapping provided; initializing from corpus
-    2020-10-19 01:23:39,154 : INFO : storing corpus in Blei's LDA-C format into /tmp/corpus.lda-c
-    2020-10-19 01:23:39,154 : INFO : saving vocabulary of 2 words to /tmp/corpus.lda-c.vocab
-    2020-10-19 01:23:39,154 : INFO : saving BleiCorpus index to /tmp/corpus.lda-c.index
-    2020-10-19 01:23:39,206 : INFO : no word id mapping provided; initializing from corpus
-    2020-10-19 01:23:39,207 : INFO : storing corpus in List-Of-Words format into /tmp/corpus.low
-    2020-10-19 01:23:39,207 : WARNING : List-of-words format can only save vectors with integer elements; 1 float entries were truncated to integer value
-    2020-10-19 01:23:39,207 : INFO : saving LowCorpus index to /tmp/corpus.low.index
+    2020-10-28 00:52:04,425 : INFO : converting corpus to SVMlight format: /tmp/corpus.svmlight
+    2020-10-28 00:52:04,426 : INFO : saving SvmLightCorpus index to /tmp/corpus.svmlight.index
+    2020-10-28 00:52:04,427 : INFO : no word id mapping provided; initializing from corpus
+    2020-10-28 00:52:04,427 : INFO : storing corpus in Blei's LDA-C format into /tmp/corpus.lda-c
+    2020-10-28 00:52:04,427 : INFO : saving vocabulary of 2 words to /tmp/corpus.lda-c.vocab
+    2020-10-28 00:52:04,427 : INFO : saving BleiCorpus index to /tmp/corpus.lda-c.index
+    2020-10-28 00:52:04,481 : INFO : no word id mapping provided; initializing from corpus
+    2020-10-28 00:52:04,481 : INFO : storing corpus in List-Of-Words format into /tmp/corpus.low
+    2020-10-28 00:52:04,482 : WARNING : List-of-words format can only save vectors with integer elements; 1 float entries were truncated to integer value
+    2020-10-28 00:52:04,482 : INFO : saving LowCorpus index to /tmp/corpus.low.index
 
 
 
@@ -518,9 +518,9 @@ Conversely, to load a corpus iterator from a Matrix Market file:
 
  .. code-block:: none
 
-    2020-10-19 01:23:39,260 : INFO : loaded corpus index from /tmp/corpus.mm.index
-    2020-10-19 01:23:39,262 : INFO : initializing cython corpus reader from /tmp/corpus.mm
-    2020-10-19 01:23:39,262 : INFO : accepted corpus with 2 documents, 2 features, 1 non-zero entries
+    2020-10-28 00:52:04,538 : INFO : loaded corpus index from /tmp/corpus.mm.index
+    2020-10-28 00:52:04,540 : INFO : initializing cython corpus reader from /tmp/corpus.mm
+    2020-10-28 00:52:04,540 : INFO : accepted corpus with 2 documents, 2 features, 1 non-zero entries
 
 
 
@@ -619,10 +619,10 @@ To save the same Matrix Market document stream in Blei's LDA-C format,
 
  .. code-block:: none
 
-    2020-10-19 01:23:39,634 : INFO : no word id mapping provided; initializing from corpus
-    2020-10-19 01:23:39,636 : INFO : storing corpus in Blei's LDA-C format into /tmp/corpus.lda-c
-    2020-10-19 01:23:39,636 : INFO : saving vocabulary of 2 words to /tmp/corpus.lda-c.vocab
-    2020-10-19 01:23:39,636 : INFO : saving BleiCorpus index to /tmp/corpus.lda-c.index
+    2020-10-28 00:52:04,921 : INFO : no word id mapping provided; initializing from corpus
+    2020-10-28 00:52:04,922 : INFO : storing corpus in Blei's LDA-C format into /tmp/corpus.lda-c
+    2020-10-28 00:52:04,923 : INFO : saving vocabulary of 2 words to /tmp/corpus.lda-c.vocab
+    2020-10-28 00:52:04,923 : INFO : saving BleiCorpus index to /tmp/corpus.lda-c.index
 
 
 
@@ -710,9 +710,9 @@ Optimize converting between corpora and NumPy/SciPy arrays?), see the :ref:`apir
 
 .. rst-class:: sphx-glr-timing
 
-   **Total running time of the script:** ( 0 minutes  2.979 seconds)
+   **Total running time of the script:** ( 0 minutes  4.010 seconds)
 
-**Estimated memory usage:**  39 MB
+**Estimated memory usage:**  40 MB
 
 
 .. _sphx_glr_download_auto_examples_core_run_corpora_and_vector_spaces.py:

diff --git a/docs/src/auto_examples/core/sg_execution_times.rst b/docs/src/auto_examples/core/sg_execution_times.rst
@@ -5,10 +5,10 @@
 
 Computation times
 =================
-**00:02.979** total execution time for **auto_examples_core** files:
+**00:04.010** total execution time for **auto_examples_core** files:
 
 +--------------------------------------------------------------------------------------------------------------+-----------+---------+
-| :ref:`sphx_glr_auto_examples_core_run_corpora_and_vector_spaces.py` (``run_corpora_and_vector_spaces.py``)   | 00:02.979 | 38.7 MB |
+| :ref:`sphx_glr_auto_examples_core_run_corpora_and_vector_spaces.py` (``run_corpora_and_vector_spaces.py``)   | 00:04.010 | 39.8 MB |
 +--------------------------------------------------------------------------------------------------------------+-----------+---------+
 | :ref:`sphx_glr_auto_examples_core_run_core_concepts.py` (``run_core_concepts.py``)                           | 00:00.000 | 0.0 MB  |
 +--------------------------------------------------------------------------------------------------------------+-----------+---------+

diff --git a/docs/src/auto_examples/tutorials/images/sphx_glr_run_word2vec_001.png b/docs/src/auto_examples/tutorials/images/sphx_glr_run_word2vec_001.png
diff --git a/docs/src/auto_examples/tutorials/images/thumb/sphx_glr_run_word2vec_thumb.png b/docs/src/auto_examples/tutorials/images/thumb/sphx_glr_run_word2vec_thumb.png
diff --git a/docs/src/auto_examples/tutorials/run_word2vec.ipynb b/docs/src/auto_examples/tutorials/run_word2vec.ipynb
@@ -177,7 +177,7 @@
       },
       "outputs": [],
       "source": [
-        "from gensim.test.utils import datapath\nfrom gensim import utils\n\nclass MyCorpus(object):\n    \"\"\"An interator that yields sentences (lists of str).\"\"\"\n\n    def __iter__(self):\n        corpus_path = datapath('lee_background.cor')\n        for line in open(corpus_path):\n            # assume there's one document per line, tokens separated by whitespace\n            yield utils.simple_preprocess(line)"
+        "from gensim.test.utils import datapath\nfrom gensim import utils\n\nclass MyCorpus:\n    \"\"\"An iterator that yields sentences (lists of str).\"\"\"\n\n    def __iter__(self):\n        corpus_path = datapath('lee_background.cor')\n        for line in open(corpus_path):\n            # assume there's one document per line, tokens separated by whitespace\n            yield utils.simple_preprocess(line)"
       ]
     },
     {

diff --git a/docs/src/auto_examples/tutorials/run_word2vec.py b/docs/src/auto_examples/tutorials/run_word2vec.py
@@ -197,8 +197,8 @@
 from gensim.test.utils import datapath
 from gensim import utils
 
-class MyCorpus(object):
-    """An interator that yields sentences (lists of str)."""
+class MyCorpus:
+    """An iterator that yields sentences (lists of str)."""
 
     def __iter__(self):
         corpus_path = datapath('lee_background.cor')

diff --git a/docs/src/auto_examples/tutorials/run_word2vec.py.md5 b/docs/src/auto_examples/tutorials/run_word2vec.py.md5
@@ -1 +1 @@
-559f9ed4b873b99bf4882096b146691d
+4598eccb1c465c724d8cfa99e216689d
Original file line number	Diff line number	Diff line change
		@@ -1 +1 @@
		c239d5c523ea2b3af1f6d4c6c51e7925
		6b98413399bca9fd1ed8fe420da85692
Original file line number	Diff line number	Diff line change
		@@ -1 +1 @@
		559f9ed4b873b99bf4882096b146691d
		4598eccb1c465c724d8cfa99e216689d