mirror of
https://github.com/huggingface/transformers.git
synced 2025-07-31 02:02:21 +06:00
[docstring] Fix docstring for ChineseCLIP
(#26880)
* Remove ChineseCLIPImageProcessor, ChineseCLIPTextConfig, ChineseCLIPVisionConfig from check_docstrings * Run fix_and_overwrite for ChineseCLIPImageProcessor, ChineseCLIPTextConfig, ChineseCLIPVisionConfig * Replace <fill_type> and <fill_docstring> in configuration_chinese_clip.py, image_processing_chinese_clip.py with type and docstring values --------- Co-authored-by: vignesh-raghunathan <vignesh_raghunathan@intuit.com>
This commit is contained in:
parent
574a538455
commit
816c2237c1
@ -75,8 +75,13 @@ class ChineseCLIPTextConfig(PretrainedConfig):
|
||||
The vocabulary size of the `token_type_ids` passed when calling [`ChineseCLIPModel`].
|
||||
initializer_range (`float`, *optional*, defaults to 0.02):
|
||||
The standard deviation of the truncated_normal_initializer for initializing all weight matrices.
|
||||
initializer_factor (`float`, *optional*, defaults to 1.0):
|
||||
A factor for initializing all weight matrices (should be kept to 1, used internally for initialization
|
||||
testing).
|
||||
layer_norm_eps (`float`, *optional*, defaults to 1e-12):
|
||||
The epsilon used by the layer normalization layers.
|
||||
pad_token_id (`int`, *optional*, defaults to 0):
|
||||
Padding token id.
|
||||
position_embedding_type (`str`, *optional*, defaults to `"absolute"`):
|
||||
Type of position embedding. Choose one of `"absolute"`, `"relative_key"`, `"relative_key_query"`. For
|
||||
positional embeddings use `"absolute"`. For more information on `"relative_key"`, please refer to
|
||||
@ -177,10 +182,14 @@ class ChineseCLIPVisionConfig(PretrainedConfig):
|
||||
Dimensionality of the encoder layers and the pooler layer.
|
||||
intermediate_size (`int`, *optional*, defaults to 3072):
|
||||
Dimensionality of the "intermediate" (i.e., feed-forward) layer in the Transformer encoder.
|
||||
projection_dim (`int`, *optional*, defaults to 512):
|
||||
Dimentionality of text and vision projection layers.
|
||||
num_hidden_layers (`int`, *optional*, defaults to 12):
|
||||
Number of hidden layers in the Transformer encoder.
|
||||
num_attention_heads (`int`, *optional*, defaults to 12):
|
||||
Number of attention heads for each attention layer in the Transformer encoder.
|
||||
num_channels (`int`, *optional*, defaults to 3):
|
||||
The number of input channels.
|
||||
image_size (`int`, *optional*, defaults to 224):
|
||||
The size (resolution) of each image.
|
||||
patch_size (`int`, *optional*, defaults to 32):
|
||||
@ -188,13 +197,13 @@ class ChineseCLIPVisionConfig(PretrainedConfig):
|
||||
hidden_act (`str` or `function`, *optional*, defaults to `"quick_gelu"`):
|
||||
The non-linear activation function (function or string) in the encoder and pooler. If string, `"gelu"`,
|
||||
`"relu"`, `"selu"` and `"gelu_new"` ``"quick_gelu"` are supported.
|
||||
layer_norm_eps (`float`, *optional*, defaults to 1e-5):
|
||||
layer_norm_eps (`float`, *optional*, defaults to 1e-05):
|
||||
The epsilon used by the layer normalization layers.
|
||||
attention_dropout (`float`, *optional*, defaults to 0.0):
|
||||
The dropout ratio for the attention probabilities.
|
||||
initializer_range (`float`, *optional*, defaults to 0.02):
|
||||
The standard deviation of the truncated_normal_initializer for initializing all weight matrices.
|
||||
initializer_factor (`float``, *optional*, defaults to 1):
|
||||
initializer_factor (`float``, *optional*, defaults to 1.0):
|
||||
A factor for initializing all weight matrices (should be kept to 1, used internally for initialization
|
||||
testing).
|
||||
Example:
|
||||
|
@ -59,7 +59,7 @@ class ChineseCLIPImageProcessor(BaseImageProcessor):
|
||||
Size of the image after resizing. The shortest edge of the image is resized to size["shortest_edge"], with
|
||||
the longest edge resized to keep the input aspect ratio. Can be overridden by `size` in the `preprocess`
|
||||
method.
|
||||
resample (`PILImageResampling`, *optional*, defaults to `PILImageResampling.BICUBIC`):
|
||||
resample (`PILImageResampling`, *optional*, defaults to `Resampling.BICUBIC`):
|
||||
Resampling filter to use if resizing the image. Can be overridden by `resample` in the `preprocess` method.
|
||||
do_center_crop (`bool`, *optional*, defaults to `True`):
|
||||
Whether to center crop the image to the specified `crop_size`. Can be overridden by `do_center_crop` in the
|
||||
@ -73,7 +73,7 @@ class ChineseCLIPImageProcessor(BaseImageProcessor):
|
||||
rescale_factor (`int` or `float`, *optional*, defaults to `1/255`):
|
||||
Scale factor to use if rescaling the image. Can be overridden by `rescale_factor` in the `preprocess`
|
||||
method.
|
||||
do_normalize:
|
||||
do_normalize (`bool`, *optional*, defaults to `True`):
|
||||
Whether to normalize the image. Can be overridden by `do_normalize` in the `preprocess` method.
|
||||
image_mean (`float` or `List[float]`, *optional*, defaults to `IMAGENET_STANDARD_MEAN`):
|
||||
Mean to use if normalizing the image. This is a float or list of floats the length of the number of
|
||||
|
@ -121,10 +121,7 @@ OBJECTS_TO_IGNORE = [
|
||||
"CamembertTokenizerFast",
|
||||
"CanineModel",
|
||||
"CanineTokenizer",
|
||||
"ChineseCLIPImageProcessor",
|
||||
"ChineseCLIPTextConfig",
|
||||
"ChineseCLIPTextModel",
|
||||
"ChineseCLIPVisionConfig",
|
||||
"ClapTextConfig",
|
||||
"CodeGenConfig",
|
||||
"CodeGenTokenizer",
|
||||
|
Loading…
Reference in New Issue
Block a user