mirror of
https://github.com/huggingface/transformers.git
synced 2025-07-16 19:18:24 +06:00
Add many missing spaces in adjacent strings (#26751)
Add missing spaces in adjacent strings
This commit is contained in:
parent
3bc65505fc
commit
40ea9ab2a1
@ -118,7 +118,7 @@ def parse_args():
|
|||||||
default=128,
|
default=128,
|
||||||
help=(
|
help=(
|
||||||
"The maximum total sequence length for target text after "
|
"The maximum total sequence length for target text after "
|
||||||
"tokenization. Sequences longer than this will be truncated, sequences shorter will be padded."
|
"tokenization. Sequences longer than this will be truncated, sequences shorter will be padded "
|
||||||
"during ``evaluate`` and ``predict``."
|
"during ``evaluate`` and ``predict``."
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
|
@ -399,7 +399,7 @@ def main():
|
|||||||
|
|
||||||
# Log on each process the small summary:
|
# Log on each process the small summary:
|
||||||
logger.warning(
|
logger.warning(
|
||||||
f"Process rank: {training_args.local_rank}, device: {training_args.device}, n_gpu: {training_args.n_gpu}"
|
f"Process rank: {training_args.local_rank}, device: {training_args.device}, n_gpu: {training_args.n_gpu}, "
|
||||||
f"distributed training: {bool(training_args.local_rank != -1)}, 16-bits training: {training_args.fp16}"
|
f"distributed training: {bool(training_args.local_rank != -1)}, 16-bits training: {training_args.fp16}"
|
||||||
)
|
)
|
||||||
# Set the verbosity to info of the Transformers logger (on main process only):
|
# Set the verbosity to info of the Transformers logger (on main process only):
|
||||||
|
@ -354,7 +354,7 @@ def main():
|
|||||||
|
|
||||||
# Log on each process the small summary:
|
# Log on each process the small summary:
|
||||||
logger.warning(
|
logger.warning(
|
||||||
f"Process rank: {training_args.local_rank}, device: {training_args.device}, n_gpu: {training_args.n_gpu}"
|
f"Process rank: {training_args.local_rank}, device: {training_args.device}, n_gpu: {training_args.n_gpu}, "
|
||||||
f"distributed training: {bool(training_args.local_rank != -1)}, 16-bits training: {training_args.fp16}"
|
f"distributed training: {bool(training_args.local_rank != -1)}, 16-bits training: {training_args.fp16}"
|
||||||
)
|
)
|
||||||
# Set the verbosity to info of the Transformers logger (on main process only):
|
# Set the verbosity to info of the Transformers logger (on main process only):
|
||||||
|
@ -455,7 +455,7 @@ def main():
|
|||||||
|
|
||||||
# Log on each process the small summary:
|
# Log on each process the small summary:
|
||||||
logger.warning(
|
logger.warning(
|
||||||
f"Process rank: {training_args.local_rank}, device: {training_args.device}, n_gpu: {training_args.n_gpu}"
|
f"Process rank: {training_args.local_rank}, device: {training_args.device}, n_gpu: {training_args.n_gpu}, "
|
||||||
f"distributed training: {bool(training_args.local_rank != -1)}, 16-bits training: {training_args.fp16}"
|
f"distributed training: {bool(training_args.local_rank != -1)}, 16-bits training: {training_args.fp16}"
|
||||||
)
|
)
|
||||||
# Set the verbosity to info of the Transformers logger (on main process only):
|
# Set the verbosity to info of the Transformers logger (on main process only):
|
||||||
|
@ -116,7 +116,7 @@ class IdeficsVisionEmbeddings(nn.Module):
|
|||||||
if fp32_upcasting:
|
if fp32_upcasting:
|
||||||
logger.warning_once(
|
logger.warning_once(
|
||||||
"Upcasting patch_pos_embed to fp32 for interpolation since `upsample_bicubic2d_out_frame` in nn.functional.interpolate "
|
"Upcasting patch_pos_embed to fp32 for interpolation since `upsample_bicubic2d_out_frame` in nn.functional.interpolate "
|
||||||
"is not implemented for 'torch.bfloat16' dtype. This will result in a slight overhead"
|
"is not implemented for 'torch.bfloat16' dtype. This will result in a slight overhead."
|
||||||
)
|
)
|
||||||
patch_pos_embed = patch_pos_embed.to(torch.float)
|
patch_pos_embed = patch_pos_embed.to(torch.float)
|
||||||
patch_pos_embed = nn.functional.interpolate(
|
patch_pos_embed = nn.functional.interpolate(
|
||||||
|
@ -1774,13 +1774,13 @@ class SwitchTransformersForConditionalGeneration(SwitchTransformersPreTrainedMod
|
|||||||
|
|
||||||
if reordered_layer_past_states[0].shape != layer_past_states[0].shape:
|
if reordered_layer_past_states[0].shape != layer_past_states[0].shape:
|
||||||
raise ValueError(
|
raise ValueError(
|
||||||
"expected reordered_layer_past_states to have the same shape than layer_past_states"
|
"expected reordered_layer_past_states to have the same shape than layer_past_states, "
|
||||||
f"but got {reordered_layer_past_states[0].shape} and {layer_past_states[0].shape}"
|
f"but got {reordered_layer_past_states[0].shape} and {layer_past_states[0].shape}"
|
||||||
)
|
)
|
||||||
if len(reordered_layer_past_states) != len(layer_past_states):
|
if len(reordered_layer_past_states) != len(layer_past_states):
|
||||||
raise ValueError(
|
raise ValueError(
|
||||||
"expected layer_past_states to have the same length as reordered_layer_past_states"
|
"expected layer_past_states to have the same length as reordered_layer_past_states, "
|
||||||
f"got {len(layer_past_states)} and {len(reordered_layer_past_states)}"
|
f"but got {len(layer_past_states)} and {len(reordered_layer_past_states)}"
|
||||||
)
|
)
|
||||||
|
|
||||||
reordered_decoder_past = reordered_decoder_past + (reordered_layer_past_states,)
|
reordered_decoder_past = reordered_decoder_past + (reordered_layer_past_states,)
|
||||||
|
Loading…
Reference in New Issue
Block a user