Source code for pororo.tasks.automated_essay_scoring

"""Automated Essay Scoring related modeling class"""

from typing import Optional

from pororo.tasks.utils.base import PororoFactoryBase, PororoSimpleBase


[docs]class PororoAesFactory(PororoFactoryBase):
    """
    Regression based Automated Essay Scoring

    English (`roberta.base.en.aes`)

        - dataset: The Hewlett Foundation: Automated Essay Scoring
        - metric: Spearman (80.25)
        - ref: https://www.kaggle.com/c/asap-aes/data

    Examples:
        >>> aes = Pororo(task="aes", lang="en")
        >>> aes("To me, leadership does not necessarily mean accumulating as many titles as possible...")
        23.56

    """

    def __init__(self, task: str, lang: str, model: Optional[str]):
        super().__init__(task, lang, model)

[docs]    @staticmethod
    def get_available_langs():
        return ["en"]

[docs]    @staticmethod
    def get_available_models():
        return {
            "en": ["roberta.base.en.aes"],
        }

[docs]    def load(self, device):
        """
        Load user-selected task-specific model

        Args:
            device (str): device information

        Returns:
            object: User-selected task-specific model

        """
        if "roberta" in self.config.n_model:
            from pororo.models.brainbert import CustomRobertaModel

            model = (CustomRobertaModel.load_model(
                f"bert/{self.config.n_model}",
                self.config.lang,
            ).eval().to(device))

            return PororoBertAes(model, self.config)


[docs]class PororoBertAes(PororoSimpleBase):

    def __init__(self, model, config):
        super().__init__(config)
        self._model = model

[docs]    def predict(self, sent: str, **kwargs):
        """
        Conduct Automated Essay Scoring

        Args:
            sent: (str) sentence to be encoded

        Returns:
            float: predicted essay score

        """
        tokens = self._model.encode(sent)
        score = (self._model.predict(
            "sentence_classification_head",
            tokens[:1024],
            return_logits=True,
        ).squeeze(-1).tolist()[0])
        return round(score * 100, 2)