Source code for textacy.preprocessing.pipeline

"""
Pipeline
--------

:mod:`textacy.preprocessing.pipeline`: Basic functionality for composing multiple
preprocessing steps into a single callable pipeline.
"""
from typing import Callable

from cytoolz import functoolz


[docs]def make_pipeline(*funcs: Callable[[str], str]) -> Callable[[str], str]: """ Make a callable pipeline that takes a text as input, passes it through one or more functions in sequential order, then outputs a single (preprocessed) text string. This function is intended as a lightweight convenience for users, allowing them to flexibly specify which (and in which order) preprocessing functions are to be applied to raw texts, then treating the whole thing as a single callable. .. code-block:: pycon >>> from textacy import preprocessing >>> preproc = preprocessing.make_pipeline( ... preprocessing.replace.hashtags, ... preprocessing.replace.user_handles, ... preprocessing.replace.emojis, ... ) >>> preproc("@spacy_io is OSS for industrial-strength NLP in Python developed by @explosion_ai 💥") '_USER_ is OSS for industrial-strength NLP in Python developed by _USER_ _EMOJI_' >>> preproc("hacking with my buddy Isaac Mewton 🥰 #PawProgramming") 'hacking with my buddy Isaac Mewton _EMOJI_ _TAG_' Args: *funcs Returns: Pipeline composed of ``*funcs`` that applies each in sequential order. """ return functoolz.compose_left(*funcs)