Source code for pororo.tasks.semantic_role_labeling

"""Semantic Role Labeling related modeling class"""

from copy import deepcopy
from typing import List, Optional

from pororo.tasks.utils.base import PororoFactoryBase, PororoSimpleBase


[docs]class PororoSrlFactory(PororoFactoryBase): """ Conduct semantic role labeling Korean (`charbert.base.ko.srl`) - dataset: UCorpus - metric: TBU - ref: http://nlplab.ulsan.ac.kr/doku.php?id=start Args: sent: (str) sentence to be parsed dependency Returns: List[Tuple[int, str, int, str]]: token index, token label, token head and its relation Examples: >>> srl = Pororo(task="srl", lang="ko") >>> srl("카터는 역삼에서 카카오브레인으로 출근한다.") [[('카터는', 'AGT'), ('역삼에서', 'LOC'), ('카카오브레인으로', 'GOL'), ('출근한다.', 'PREDICATE')]] >>> srl("피고인은 거제에서 400만 원 상당의 순금목걸이를 피해자로부터 강취하였다.") [[('피고인은', 'AGT'), ('거제에서', '-'), ('400만', '-'), ('원', '-'), ('상당의', '-'), ('순금목걸이를', 'THM'), ('피해자로부터', 'SRC'), ('강취하였다.', 'PREDICATE')]] """ def __init__(self, task: str, lang: str, model: Optional[str]): super().__init__(task, lang, model)
[docs] @staticmethod def get_available_langs(): return ["ko"]
[docs] @staticmethod def get_available_models(): return {"ko": ["charbert.base.ko.srl"]}
[docs] def load(self, device: str): """ Load user-selected task-specific model Args: device (str): device information Returns: object: User-selected task-specific model """ if "charbert" in self.config.n_model: from pororo.models.brainbert import RobertaLabelModel from pororo.tasks import PororoPosFactory model = RobertaLabelModel.load_model( f"bert/{self.config.n_model}", self.config.lang, ).eval().to(device) tagger = PororoPosFactory( task="pos", model="mecab-ko", lang=self.config.lang, ).load(device) return PororoBertSRL(model, tagger, self.config)
[docs]class PororoBertSRL(PororoSimpleBase): def __init__(self, model, tagger, config): super().__init__(config) self._tagger = tagger self._model = model self._verbs = ["VV", "VA", "XSV", "XSA", "VCN"] def _split_list(self, lst: List, seperator: str): """ Split list using seperator Args: lst (list): PoS tagger pair list seperator (str): seperator token Returns: list: splitted list of list """ res = [] tmp = [] for elem in lst: if elem[0] == seperator: res.append(tmp) tmp = [] continue tmp.append(elem) res.append(tmp) return res def _preprocess(self, sent: str) -> str: """ Preprocess semantic role labeling input to specify predicate Args: sent (str): input sentence Returns: str: preprocessed input """ words = self._split_list([list(tag) for tag in self._tagger(sent)], " ") vs = [] for i, word in enumerate(words): for morph in word: if morph[1] in self._verbs: vs.append(i) break sents = [] for v in vs: morphs = deepcopy(words) morphs[v][0][0] = f"★{morphs[v][0][0]}" sent, seg = str(), str() for elems in morphs: for pair in elems: morph, tag = pair tag = f"{tag} " if morph == " ": sent += "▁ " seg += tag continue chars = [c for c in morph] sent += f"{' '.join(chars)} " seg += tag * len(chars) sent += "▁ " seg += "SPACE " sents.append((sent.strip(), seg.strip())) return sents def _postprocess(self, result: List, origin: str): """ Postprocess semantic role labeling model inference result Args: result (List): inferenced semantic roles origin (str): original query string Returns: List[Tuple]: postprocessed result """ tokens = origin.split() fin = [] for res in result: res = self._split_list(res, "▁") tmp = [] for i, token in enumerate(tokens): if "★" in res[i][0][0]: tmp.append((token, "PREDICATE")) continue tmp.append((token, res[i][0][1])) fin.append(tmp) return fin
[docs] def predict(self, sent: str, **kwargs): """ Conduct semantic role labeling Args: sent: (str) sentence to be parsed dependency Returns: List[Tuple[int, str, int, str]]: token index, token label, token head and its relation """ preproc = self._preprocess(sent) if not preproc: return "There is NO predicate to be labeled" res = [] for p in preproc: res.append(self._model.predict_srl(p[0], p[1])) return self._postprocess(res, sent)