Source code for pororo.tasks.sentiment_analysis

"""Sentiment Analysis related modeling class"""

from typing import Optional

from pororo.tasks.utils.base import PororoFactoryBase, PororoSimpleBase


[docs]class PororoSentimentFactory(PororoFactoryBase):
    """
    Classification based sentiment analysis using Review Corpus

    Korean (`brainbert.base.ko.shopping`)

        - dataset: Shopping review corpus
        - metric: Accuracy (95.00)
        - ref: https://github.com/bab2min/corpus/tree/master/sentiment

    Korean (`brainbert.base.ko.nsmc`)

        - dataset: Naver sentiment movie corpus
        - metric: Accuracy (90.84)
        - ref: https://github.com/e9t/nsmc

    Japanese (`jaberta.base.ja.sentiment`)

        - data: Internal data
        - metric: Accuracy (96.29)

    Examples:
        >>> sa = Pororo(task="sentiment", model="brainbert.base.ko.nsmc", lang="ko")
        >>> sa("배송이 버트 학습시키는 것 만큼 느리네요")
        'Negative'
        >>> sa("배송이 경량화되었는지 빠르네요")
        'Positive'
        >>> sa = Pororo(task="sentiment", lang="ja")
        >>> sa("日が暑くもイライラか。")  # 날이 더워서 너무 짜증나요.
        'Negative'
        >>> sa('日が良く散歩に行きたいです。')  # 날이 좋아서 산책을 가고 싶어요.
        'Positive'
        >>> sa = Pororo(task="sentiment", model="brainbert.base.ko.shopping", lang="ko")
        >>> sa("꽤 맘에 들었어요. 겉에서 봤을땐 허름?했는데 맛도 있고, 괜찮아요")
        'Positive'
        >>> sa("예약하고 가세요 대기줄이 깁니다 훠궈는 하이디라오가 비싼만큼 만족도가 제일 높아요")
        'Negative'
        >>> sa("이걸 산 내가 레전드", show_probs=True)
        {'negative': 0.7525266408920288, 'positive': 0.2474733293056488}

    """

    def __init__(self, task: str, lang: str, model: Optional[str]):
        super().__init__(task, lang, model)

[docs]    @staticmethod
    def get_available_langs():
        return ["ko", "ja"]

[docs]    @staticmethod
    def get_available_models():
        return {
            "ko": [
                "brainbert.base.ko.shopping",
                "brainbert.base.ko.nsmc",
            ],
            "ja": ["jaberta.base.ja.sentiment"],
        }

[docs]    def load(self, device: str):
        """
        Load user-selected task-specific model

        Args:
            device (str): device information

        Returns:
            object: User-selected task-specific model

        """
        if "brainbert" in self.config.n_model:
            from pororo.models.brainbert import BrainRobertaModel

            model = (BrainRobertaModel.load_model(
                f"bert/{self.config.n_model}",
                self.config.lang,
            ).eval().to(device))
            return PororoBertSentiment(model, self.config)

        if "jaberta" in self.config.n_model:
            from pororo.models.brainbert import JabertaModel

            model = (JabertaModel.load_model(
                f"bert/{self.config.n_model}",
                self.config.lang,
            ).eval().to(device))
            return PororoBertSentiment(model, self.config)


[docs]class PororoBertSentiment(PororoSimpleBase):

    def __init__(self, model, config):
        super().__init__(config)
        self._model = model
        self._label_fn = {
            "0": "negative",
            "1": "positive",
            "negative": "negative",
            "positive": "positive",
        }

[docs]    def predict(self, sent: str, **kwargs) -> str:
        """
        Conduct sentiment analysis

        Args:
            sent: (str) sentence to be sentiment analyzed
            show_probs: (bool) whether to show probability score

        Returns:
            str: predicted sentence label - `negative` or `positive`

        """
        show_probs = kwargs.get("show_probs", False)

        res = self._model.predict_output(sent, show_probs=show_probs)
        if show_probs:
            probs = {self._label_fn[r]: res[r] for r in res}
            return probs
        else:
            if self.config.lang == "ko":
                return self._label_fn[res].title()
            return res.title()