mirror of
https://github.com/huggingface/transformers.git
synced 2025-07-27 08:18:58 +06:00
added model_card for model codeswitch-hineng-lid-lince and codeswitch-spaeng-lid-lince (#6663)
* Create README.md * Update README.md * Create README.md * Update README.md
This commit is contained in:
parent
97bb2497ab
commit
068df740bd
@ -0,0 +1,37 @@
|
|||||||
|
---
|
||||||
|
language:
|
||||||
|
- hi
|
||||||
|
- en
|
||||||
|
---
|
||||||
|
|
||||||
|
# codeswitch-hineng-lid-lince
|
||||||
|
This is a pretrained model for **language identification** of `hindi-english` code-mixed data used from [LinCE](https://ritual.uh.edu/lince/home)
|
||||||
|
|
||||||
|
## Identify Language
|
||||||
|
|
||||||
|
* Method-1
|
||||||
|
|
||||||
|
```py
|
||||||
|
|
||||||
|
from transformers import AutoTokenizer, AutoModelForTokenClassification
|
||||||
|
|
||||||
|
tokenizer = AutoTokenizer.from_pretrained("sagorsarker/codeswitch-hineng-lid-lince")
|
||||||
|
|
||||||
|
model = AutoModelForTokenClassification.from_pretrained("sagorsarker/codeswitch-hineng-lid-lince")
|
||||||
|
lid_model = pipeline('ner', model=model, tokenizer=tokenizer)
|
||||||
|
|
||||||
|
lid_model("put any hindi english code-mixed sentence")
|
||||||
|
|
||||||
|
```
|
||||||
|
|
||||||
|
* Method-2
|
||||||
|
|
||||||
|
```py
|
||||||
|
# !pip install codeswitch
|
||||||
|
from codeswitch.codeswitch import LanguageIdentification
|
||||||
|
lid = LanguageIdentification('hin-eng')
|
||||||
|
text = "" # your code-mixed sentence
|
||||||
|
result = lid.identify(text)
|
||||||
|
print(result)
|
||||||
|
```
|
||||||
|
|
@ -0,0 +1,36 @@
|
|||||||
|
---
|
||||||
|
language:
|
||||||
|
- es
|
||||||
|
- en
|
||||||
|
---
|
||||||
|
|
||||||
|
# codeswitch-spaeng-lid-lince
|
||||||
|
This is a pretrained model for **language identification** of `spanish-english` code-mixed data used from [LinCE](https://ritual.uh.edu/lince/home)
|
||||||
|
|
||||||
|
## Identify Language
|
||||||
|
|
||||||
|
* Method-1
|
||||||
|
|
||||||
|
```py
|
||||||
|
|
||||||
|
from transformers import AutoTokenizer, AutoModelForTokenClassification
|
||||||
|
|
||||||
|
tokenizer = AutoTokenizer.from_pretrained("sagorsarker/codeswitch-spaeng-lid-lince")
|
||||||
|
|
||||||
|
model = AutoModelForTokenClassification.from_pretrained("sagorsarker/codeswitch-spaeng-lid-lince")
|
||||||
|
lid_model = pipeline('ner', model=model, tokenizer=tokenizer)
|
||||||
|
|
||||||
|
lid_model("put any spanish english code-mixed sentence")
|
||||||
|
|
||||||
|
```
|
||||||
|
|
||||||
|
* Method-2
|
||||||
|
|
||||||
|
```py
|
||||||
|
# !pip install codeswitch
|
||||||
|
from codeswitch.codeswitch import LanguageIdentification
|
||||||
|
lid = LanguageIdentification('spa-eng')
|
||||||
|
text = "" # your code-mixed sentence
|
||||||
|
result = lid.identify(text)
|
||||||
|
print(result)
|
||||||
|
```
|
Loading…
Reference in New Issue
Block a user