Source code for textacy.similarity.sequences

"""
Sequence-based Metrics
----------------------

:mod:`textacy.similarity.sequences`: Normalized similarity metrics built on
sequence-based algorithms that identify and measure the subsequences common to each.
"""
import difflib
from typing import Sequence


[docs]def matching_subsequences_ratio( seq1: Sequence[str], seq2: Sequence[str], **kwargs ) -> float: """ Measure the similarity between two sequences of strings by finding contiguous matching subsequences without any "junk" elements and normalizing by the total number of elements. Args: seq1 seq2 **kwargs isjunk: Optional[Callable[str], bool] = None autojunk: bool = True Returns: Similarity between ``seq1`` and ``seq2`` in the interval [0.0, 1.0], where larger values correspond to more similar sequences of strings Reference: https://docs.python.org/3/library/difflib.html#difflib.SequenceMatcher.ratio """ return difflib.SequenceMatcher(a=seq1, b=seq2, **kwargs).ratio()