mirror of
https://github.com/huggingface/transformers.git
synced 2025-08-02 03:01:07 +06:00
Fix group_lengths for short datasets (#12558)
This commit is contained in:
parent
0a6b9048d1
commit
6f1adc4334
@ -398,6 +398,7 @@ def main():
|
||||
total_length = len(concatenated_examples[list(examples.keys())[0]])
|
||||
# We drop the small remainder, we could add padding if the model supported it instead of this drop, you can
|
||||
# customize this part to your needs.
|
||||
if total_length >= block_size:
|
||||
total_length = (total_length // block_size) * block_size
|
||||
# Split by chunks of max_len.
|
||||
result = {
|
||||
|
@ -431,6 +431,7 @@ if __name__ == "__main__":
|
||||
total_length = len(concatenated_examples[list(examples.keys())[0]])
|
||||
# We drop the small remainder, we could add padding if the model supported it instead of this drop, you can
|
||||
# customize this part to your needs.
|
||||
if total_length >= max_seq_length:
|
||||
total_length = (total_length // max_seq_length) * max_seq_length
|
||||
# Split by chunks of max_len.
|
||||
result = {
|
||||
|
@ -541,6 +541,7 @@ if __name__ == "__main__":
|
||||
total_length = len(concatenated_examples[list(examples.keys())[0]])
|
||||
# We drop the small remainder, we could add padding if the model supported it instead of this drop, you can
|
||||
# customize this part to your needs.
|
||||
if total_length >= expanded_inputs_length:
|
||||
total_length = (total_length // expanded_inputs_length) * expanded_inputs_length
|
||||
# Split by chunks of max_len.
|
||||
result = {
|
||||
|
@ -404,6 +404,7 @@ def main():
|
||||
total_length = len(concatenated_examples[list(examples.keys())[0]])
|
||||
# We drop the small remainder, we could add padding if the model supported it instead of this drop, you can
|
||||
# customize this part to your needs.
|
||||
if total_length >= block_size:
|
||||
total_length = (total_length // block_size) * block_size
|
||||
# Split by chunks of max_len.
|
||||
result = {
|
||||
|
@ -343,6 +343,7 @@ def main():
|
||||
total_length = len(concatenated_examples[list(examples.keys())[0]])
|
||||
# We drop the small remainder, we could add padding if the model supported it instead of this drop, you can
|
||||
# customize this part to your needs.
|
||||
if total_length >= block_size:
|
||||
total_length = (total_length // block_size) * block_size
|
||||
# Split by chunks of max_len.
|
||||
result = {
|
||||
|
@ -433,6 +433,7 @@ def main():
|
||||
total_length = len(concatenated_examples[list(examples.keys())[0]])
|
||||
# We drop the small remainder, we could add padding if the model supported it instead of this drop, you can
|
||||
# customize this part to your needs.
|
||||
if total_length >= max_seq_length:
|
||||
total_length = (total_length // max_seq_length) * max_seq_length
|
||||
# Split by chunks of max_len.
|
||||
result = {
|
||||
|
@ -387,6 +387,7 @@ def main():
|
||||
total_length = len(concatenated_examples[list(examples.keys())[0]])
|
||||
# We drop the small remainder, we could add padding if the model supported it instead of this drop, you can
|
||||
# customize this part to your needs.
|
||||
if total_length >= max_seq_length:
|
||||
total_length = (total_length // max_seq_length) * max_seq_length
|
||||
# Split by chunks of max_len.
|
||||
result = {
|
||||
|
@ -406,6 +406,7 @@ def main():
|
||||
total_length = len(concatenated_examples[list(examples.keys())[0]])
|
||||
# We drop the small remainder, we could add padding if the model supported it instead of this drop, you can
|
||||
# customize this part to your needs.
|
||||
if total_length >= max_seq_length:
|
||||
total_length = (total_length // max_seq_length) * max_seq_length
|
||||
# Split by chunks of max_len.
|
||||
result = {
|
||||
|
@ -405,6 +405,7 @@ def main():
|
||||
total_length = len(concatenated_examples[list(examples.keys())[0]])
|
||||
# We drop the small remainder, we could add padding if the model supported it instead of this drop, you can
|
||||
# customize this part to your needs.
|
||||
if total_length >= block_size:
|
||||
total_length = (total_length // block_size) * block_size
|
||||
# Split by chunks of max_len.
|
||||
result = {
|
||||
|
@ -466,6 +466,7 @@ def main():
|
||||
total_length = len(concatenated_examples[list(examples.keys())[0]])
|
||||
# We drop the small remainder, we could add padding if the model supported it instead of this drop, you can
|
||||
# customize this part to your needs.
|
||||
if total_length >= max_seq_length:
|
||||
total_length = (total_length // max_seq_length) * max_seq_length
|
||||
# Split by chunks of max_len.
|
||||
result = {
|
||||
|
Loading…
Reference in New Issue
Block a user