mirror of
https://github.com/huggingface/transformers.git
synced 2025-07-04 05:10:06 +06:00
Cleanup BatchFeature
and BatchEncoding
(#38459)
* Use dict comprehension to create dict * Fix type annotation Union[Any] doesn't really make any sense * Remove methods that are already implemented in the `UserDict` parent class
This commit is contained in:
parent
8e5cefcb1e
commit
e508965df7
@ -75,7 +75,7 @@ class BatchFeature(UserDict):
|
|||||||
super().__init__(data)
|
super().__init__(data)
|
||||||
self.convert_to_tensors(tensor_type=tensor_type)
|
self.convert_to_tensors(tensor_type=tensor_type)
|
||||||
|
|
||||||
def __getitem__(self, item: str) -> Union[Any]:
|
def __getitem__(self, item: str) -> Any:
|
||||||
"""
|
"""
|
||||||
If the key is a string, returns the value of the dict associated to `key` ('input_values', 'attention_mask',
|
If the key is a string, returns the value of the dict associated to `key` ('input_values', 'attention_mask',
|
||||||
etc.).
|
etc.).
|
||||||
@ -98,18 +98,6 @@ class BatchFeature(UserDict):
|
|||||||
if "data" in state:
|
if "data" in state:
|
||||||
self.data = state["data"]
|
self.data = state["data"]
|
||||||
|
|
||||||
# Copied from transformers.tokenization_utils_base.BatchEncoding.keys
|
|
||||||
def keys(self):
|
|
||||||
return self.data.keys()
|
|
||||||
|
|
||||||
# Copied from transformers.tokenization_utils_base.BatchEncoding.values
|
|
||||||
def values(self):
|
|
||||||
return self.data.values()
|
|
||||||
|
|
||||||
# Copied from transformers.tokenization_utils_base.BatchEncoding.items
|
|
||||||
def items(self):
|
|
||||||
return self.data.items()
|
|
||||||
|
|
||||||
def _get_is_as_tensor_fns(self, tensor_type: Optional[Union[str, TensorType]] = None):
|
def _get_is_as_tensor_fns(self, tensor_type: Optional[Union[str, TensorType]] = None):
|
||||||
if tensor_type is None:
|
if tensor_type is None:
|
||||||
return None, None
|
return None, None
|
||||||
@ -218,7 +206,6 @@ class BatchFeature(UserDict):
|
|||||||
requires_backends(self, ["torch"])
|
requires_backends(self, ["torch"])
|
||||||
import torch # noqa
|
import torch # noqa
|
||||||
|
|
||||||
new_data = {}
|
|
||||||
device = kwargs.get("device")
|
device = kwargs.get("device")
|
||||||
non_blocking = kwargs.get("non_blocking", False)
|
non_blocking = kwargs.get("non_blocking", False)
|
||||||
# Check if the args are a device or a dtype
|
# Check if the args are a device or a dtype
|
||||||
@ -233,17 +220,19 @@ class BatchFeature(UserDict):
|
|||||||
else:
|
else:
|
||||||
# it's something else
|
# it's something else
|
||||||
raise ValueError(f"Attempting to cast a BatchFeature to type {str(arg)}. This is not supported.")
|
raise ValueError(f"Attempting to cast a BatchFeature to type {str(arg)}. This is not supported.")
|
||||||
|
|
||||||
# We cast only floating point tensors to avoid issues with tokenizers casting `LongTensor` to `FloatTensor`
|
# We cast only floating point tensors to avoid issues with tokenizers casting `LongTensor` to `FloatTensor`
|
||||||
for k, v in self.items():
|
def maybe_to(v):
|
||||||
# check if v is a floating point
|
# check if v is a floating point
|
||||||
if isinstance(v, torch.Tensor) and torch.is_floating_point(v):
|
if isinstance(v, torch.Tensor) and torch.is_floating_point(v):
|
||||||
# cast and send to device
|
# cast and send to device
|
||||||
new_data[k] = v.to(*args, **kwargs)
|
return v.to(*args, **kwargs)
|
||||||
elif isinstance(v, torch.Tensor) and device is not None:
|
elif isinstance(v, torch.Tensor) and device is not None:
|
||||||
new_data[k] = v.to(device=device, non_blocking=non_blocking)
|
return v.to(device=device, non_blocking=non_blocking)
|
||||||
else:
|
else:
|
||||||
new_data[k] = v
|
return v
|
||||||
self.data = new_data
|
|
||||||
|
self.data = {k: maybe_to(v) for k, v in self.items()}
|
||||||
return self
|
return self
|
||||||
|
|
||||||
|
|
||||||
|
@ -294,15 +294,6 @@ class BatchEncoding(UserDict):
|
|||||||
if "encodings" in state:
|
if "encodings" in state:
|
||||||
self._encodings = state["encodings"]
|
self._encodings = state["encodings"]
|
||||||
|
|
||||||
def keys(self):
|
|
||||||
return self.data.keys()
|
|
||||||
|
|
||||||
def values(self):
|
|
||||||
return self.data.values()
|
|
||||||
|
|
||||||
def items(self):
|
|
||||||
return self.data.items()
|
|
||||||
|
|
||||||
# After this point:
|
# After this point:
|
||||||
# Extended properties and methods only available for fast (Rust-based) tokenizers
|
# Extended properties and methods only available for fast (Rust-based) tokenizers
|
||||||
# provided by HuggingFace tokenizers library.
|
# provided by HuggingFace tokenizers library.
|
||||||
|
Loading…
Reference in New Issue
Block a user