Source code for pororo.tasks.collocation

"""Collocation related modeling class"""

from typing import Optional

from pororo.tasks.utils.base import PororoFactoryBase, PororoSimpleBase
from pororo.tasks.utils.download_utils import download_or_load


[docs]class PororoCollocationFactory(PororoFactoryBase): """ Conduct collocation search using index file English (`collocate.en`) - dataset: enwiki-20180420 - metric: N/A Korean (`kollocate`) - dataset: kowiki-20200720 - metric: N/A Chinese (`collocate.zh`) - dataset: zhwiki-20180420 - metric: N/A Japanse (`collocate.ja`) - dataset: jawiki-20180420 - metric: N/A Args: text (str): text to be inputted for collocation search Returns: dict: searched collocation splitted by part of speech Examples: >>> col = Pororo(task="col", lang="ko") >>> col("먹") 먹 as verb noun 것(39), 수(29), 음식(23), 등(16), 고기(14), .. verb 하(33), 않(21), 살(17), 즐기(11), 굽(9), .. adverb 많이(10), 주로(7), 다(5), 같이(4), 잘(4), ... determiner 다른(5), 그(2), 여러(1), 세(1), 몇몇(1), 새(1) adjective 싶(5), 어리(1), 편하(1), 작(1), 좋(1), 손쉽(1), 못하(1) 먹 as noun noun 붓(3), 종이(2), 묘선(1), 청자(1), 은장도(1), 제조(1), .. verb 의하(1), 그리(1), 찍(1), 차(1), 늘어놓(1) adverb 하지만(1) >>> col = Pororo(task="collocation", lang="ja") >>> col("東京") {'noun': {'noun': [('都', 137), ('家', 21), ('年', 18), ('府', 17), ('市', 12), ('式', 12), ('デザイナー', 10), ('日', 10), ('都立', 9), ('県', 9), ('出身', 8), ('証券', 8), ('後', 6)]}} >>> col = Pororo(task="col", lang="en") >>> col("george") {'noun': {'noun': [('washington', 13), ('gen.', 7)]}} >>> col = Pororo(task="col", lang="zh") >>> col("世界杯") {'noun': {'noun': [('2002年', 72), ('足球赛', 71), ('冠军', 53), ('2006年', 39), ('決賽', 35), ('决赛', 30), ('1998年', 26), ('外圍賽', 25), ('2010年', 23), ('2018年', 22), ('冠軍', 21), ...}} """ def __init__(self, task: str, lang: str, model: Optional[str]): super().__init__(task, lang, model)
[docs] @staticmethod def get_available_langs(): return ["ko", "en", "ja", "zh"]
[docs] @staticmethod def get_available_models(): return { "ko": ["kollocate"], "en": ["collocate.en"], "ja": ["collocate.ja"], "zh": ["collocate.zh"], }
[docs] def load(self, device: str): """ Load user-selected task-specific model Args: device (str): device information Returns: object: User-selected task-specific model """ if self.config.n_model == "kollocate": try: from kollocate import Kollocate except ModuleNotFoundError as error: raise error.__class__( "Please install kollocate with: `pip install kollocate`") model = Kollocate() return PororoCollocate(model, self.config) if "collocate" in self.config.n_model: from pororo.models.collocate import Collocate index_path = download_or_load( f"misc/collocate.{self.config.lang}.zip", self.config.lang, ) model = Collocate(index_path) return PororoCollocate(model, self.config)
[docs]class PororoCollocate(PororoSimpleBase): def __init__(self, model, config): super().__init__(config) self._model = model
[docs] def predict(self, text: str, **kwargs) -> str: """ Conduct collocation search using index file Args: text (str): text to be inputted for collocation search Returns: dict: searched collocation splitted by part of speech """ return self._model(text)