Исходный код recs_searcher.augmentation._word_aug
import re
from typing import List, Optional, Literal
import numpy as np
from ._base import BaseAugmentation
from ._actions import WORD_ACTIONS
[документация]
class WordAugmentation(BaseAugmentation):
"""Augmentation at the level of words."""
def __init__(
self,
unit_prob: float = 0.3,
min_aug: int = 1,
max_aug: int = 5,
action: Optional[Literal["delete", "swap", "split"]] = None,
seed: int = None,
) -> None:
super().__init__(
min_aug=min_aug,
max_aug=max_aug,
seed=seed,
)
self.unit_prob = unit_prob
if action is None:
self.action = np.random.choice(WORD_ACTIONS)
else:
self.action = action
@property
def actions_list(self) -> List[str]:
"""
Returns:
List[str]: A list of possible methods.
"""
return WORD_ACTIONS
def __split(self, word: str) -> str:
"""Divides a word character-by-character.
Args:
word (str): A word with the correct spelling.
Returns:
str: Word with spaces.
"""
word = " ".join(list(word))
return word
def __delete(self) -> str:
"""Deletes a random word.
Returns:
str: Empty string.
"""
return ""
[документация]
def _transform(self, array: List[str]) -> List[str]:
transformed_array = []
for text in array:
aug_sent_arr = text.split()
aug_idxs = self._aug_indexing(aug_sent_arr, self.unit_prob, clip=True)
for idx in aug_idxs:
if self.action == "delete":
aug_sent_arr[idx] = self.__delete()
elif self.action == "swap":
swap_idx = np.random.randint(0, len(aug_sent_arr) - 1)
aug_sent_arr[swap_idx], aug_sent_arr[idx] = (
aug_sent_arr[idx],
aug_sent_arr[swap_idx],
)
elif self.action == "split":
aug_sent_arr[idx] = self.__split(aug_sent_arr[idx])
else:
raise NameError(
"""These type of augmentation is not available, please check EDAAug.actions_list() to see
available augmentations"""
)
text = re.sub(" +", " ", " ".join(aug_sent_arr).strip())
transformed_array.append(text)
return transformed_array