Merge pull request #427 from jeonsworld/patch-1

fix sample_doc
This commit is contained in:
Thomas Wolf 2019-04-03 11:26:58 +02:00 committed by GitHub
commit 9ca25ce828
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -49,7 +49,7 @@ class DocumentDatabase:
self._precalculate_doc_weights()
rand_start = self.doc_cumsum[current_idx]
rand_end = rand_start + self.cumsum_max - self.doc_lengths[current_idx]
sentence_index = randint(rand_start, rand_end) % self.cumsum_max
sentence_index = randint(rand_start, rand_end-1) % self.cumsum_max
sampled_doc_index = np.searchsorted(self.doc_cumsum, sentence_index, side='right')
else:
# If we don't use sentence weighting, then every doc has an equal chance to be chosen