From 068df740bd73b95e9a1e233e47608df942fda9da Mon Sep 17 00:00:00 2001 From: Sagor Sarker Date: Sat, 22 Aug 2020 22:13:21 +0600 Subject: [PATCH] added model_card for model codeswitch-hineng-lid-lince and codeswitch-spaeng-lid-lince (#6663) * Create README.md * Update README.md * Create README.md * Update README.md --- .../codeswitch-hineng-lid-lince/README.md | 37 +++++++++++++++++++ .../codeswitch-spaeng-lid-lince/README.md | 36 ++++++++++++++++++ 2 files changed, 73 insertions(+) create mode 100644 model_cards/sagorsarker/codeswitch-hineng-lid-lince/README.md create mode 100644 model_cards/sagorsarker/codeswitch-spaeng-lid-lince/README.md diff --git a/model_cards/sagorsarker/codeswitch-hineng-lid-lince/README.md b/model_cards/sagorsarker/codeswitch-hineng-lid-lince/README.md new file mode 100644 index 00000000000..f7da9444a45 --- /dev/null +++ b/model_cards/sagorsarker/codeswitch-hineng-lid-lince/README.md @@ -0,0 +1,37 @@ +--- +language: +- hi +- en +--- + +# codeswitch-hineng-lid-lince +This is a pretrained model for **language identification** of `hindi-english` code-mixed data used from [LinCE](https://ritual.uh.edu/lince/home) + +## Identify Language + +* Method-1 + +```py + +from transformers import AutoTokenizer, AutoModelForTokenClassification + +tokenizer = AutoTokenizer.from_pretrained("sagorsarker/codeswitch-hineng-lid-lince") + +model = AutoModelForTokenClassification.from_pretrained("sagorsarker/codeswitch-hineng-lid-lince") +lid_model = pipeline('ner', model=model, tokenizer=tokenizer) + +lid_model("put any hindi english code-mixed sentence") + +``` + +* Method-2 + +```py +# !pip install codeswitch +from codeswitch.codeswitch import LanguageIdentification +lid = LanguageIdentification('hin-eng') +text = "" # your code-mixed sentence +result = lid.identify(text) +print(result) +``` + diff --git a/model_cards/sagorsarker/codeswitch-spaeng-lid-lince/README.md b/model_cards/sagorsarker/codeswitch-spaeng-lid-lince/README.md new file mode 100644 index 00000000000..451364bf29b --- /dev/null +++ b/model_cards/sagorsarker/codeswitch-spaeng-lid-lince/README.md @@ -0,0 +1,36 @@ +--- +language: +- es +- en +--- + +# codeswitch-spaeng-lid-lince +This is a pretrained model for **language identification** of `spanish-english` code-mixed data used from [LinCE](https://ritual.uh.edu/lince/home) + +## Identify Language + +* Method-1 + +```py + +from transformers import AutoTokenizer, AutoModelForTokenClassification + +tokenizer = AutoTokenizer.from_pretrained("sagorsarker/codeswitch-spaeng-lid-lince") + +model = AutoModelForTokenClassification.from_pretrained("sagorsarker/codeswitch-spaeng-lid-lince") +lid_model = pipeline('ner', model=model, tokenizer=tokenizer) + +lid_model("put any spanish english code-mixed sentence") + +``` + +* Method-2 + +```py +# !pip install codeswitch +from codeswitch.codeswitch import LanguageIdentification +lid = LanguageIdentification('spa-eng') +text = "" # your code-mixed sentence +result = lid.identify(text) +print(result) +```