Utils

In this file implemented the helper function

`add_custom_token(nlp, spacial_token, token_attrs)`

Add custom token

Parameters:

Name	Type	Description	Default
`nlp`	`object`	pipeline	required
`spacial_token`	`string`	the string to specially tokenize	required
`token_attrs`	`list[dict]`	a list of dicts, where each dict describes a token and its attributes.	required

Source code in app/utils.py

def add_custom_token(nlp, spacial_token, token_attrs):
    """
    Add custom token
    Parameters
    ----------
    nlp : object
        pipeline
    spacial_token : string
        the string to specially tokenize
    token_attrs : list[dict]
        a list of dicts, where each dict describes a token and its attributes.

    Returns
    -------

    """
    nlp.tokenizer.add_special_case(spacial_token, token_attrs)

`generate_guid(k=6)`

Generate random id

Parameters:

Name	Type	Description	Default
`k`	`int`	length of guid	`6`

Returns:

Name	Type	Description
`guid`	`str`	random id

Source code in app/utils.py

def generate_guid(k=6):
    """
    Generate random id
    Parameters
    ----------
    k : int
        length of guid

    Returns
    -------
    guid : str
        random id
    """

    guid = ''.join(random.choices(string.ascii_uppercase + string.digits, k=k))
    return guid

`generate_openapi_json(openapi_version='3.1.0')`

Generate and save openapi specification of the application

Parameters:

Name	Type	Description	Default
`openapi_version`	`str`	openapi version specifier	`'3.1.0'`

Returns:

Type	Description
`None`

Source code in app/utils.py

def generate_openapi_json(openapi_version="3.1.0"):
    """
    Generate and save openapi specification of the application

    Parameters
    ----------
    openapi_version : str
        openapi version specifier

    Returns
    -------
    None
    """
    parser = ArgumentParser()
    parser.add_argument('--app_path')
    parser.add_argument('--server_url', default='0.0.0.0:8000')
    args = parser.parse_args()
    app_path = args.app_path
    server_url = args.server_url
    module = importlib.import_module(app_path)
    app = module.app

    openapi_json = get_openapi(
        title=app.title,
        openapi_version=openapi_version,
        version=app.version,
        description=app.description,
        routes=app.routes,
        servers=[{'url':server_url}]
    )
    json_save('openapi.json', openapi_json)

`json_load(path, encoding='utf-8')`

Load json files from path

Parameters:

Name	Type	Description	Default
`path`	`str`	path to save object	required
`encoding`	`str`	encoding type	`'utf-8'`

Returns:

Name	Type	Description
`obj`	`(Object) object`

Source code in app/utils.py

def json_load(path, encoding='utf-8'):
    """
    Load json files from path
    Parameters
    ----------
    path : str
        path to save object
    encoding : str
        encoding type

    Returns
    -------
    obj : (Object) object

    """
    with open(path, 'r', encoding=encoding) as file:
        obj = json.load(file)
        return obj

`json_save(path, obj)`

Save supported files to json format

Parameters:

Name	Type	Description	Default
`path`	`str`	path to save object	required
`obj`	`(Object) object`		required

Source code in app/utils.py

def json_save(path, obj):
    """
    Save supported files to json format
    Parameters
    ----------
    path : str
        path to save object
    -------
    obj : (Object) object

    Returns
    ---------
    """
    with open(path, 'w', encoding='utf-8') as file:
        json.dump(obj, file)

`pickle_load(path)`

Load supported pickle format files

Parameters:

Name	Type	Description	Default
`path`	`str: path to save object`		required

Returns:

Name	Type	Description
`obj`	`(Object) any object with any pickle supported format`

Source code in app/utils.py

def pickle_load(path):
    """
    Load supported pickle format files
    Parameters
    ----------
    path : str: path to save object

    Returns
    -------
    obj : (Object) any object with any pickle supported format
    """

    with open(path, 'rb') as file:
        obj = pickle.load(file)

        return obj

`pickle_save(path, obj)`

Save supported files in pickle format

Parameters:

Name	Type	Description	Default
`path`	`str: path to save object`		required
`obj`	`object: any pickle serializable object`		required

Source code in app/utils.py

def pickle_save(path, obj):
    """
    Save supported files in pickle format
    Parameters
    ----------
    path : str: path to save object
    obj : object: any pickle serializable object

    Returns
    -------
    """
    with open(path, 'wb') as file:
        pickle.dump(obj, file)

`any_next_words_form_swear_word(cur_word, words_indices, censor_words)`

Return True, and the end index of the word in the text, if any word formed in words_indices is in CENSOR_WORDSET.

Parameters:

Name	Type	Description	Default
`cur_word`	`str`	current word	required
`words_indices`	`list[int]`	words indices	required
`censor_words`	`list[str]`	censor words	required

Returns:

Name	Type	Description
`end_index`	`int`	last index of censor word

Source code in better_profanity/utils.py

def any_next_words_form_swear_word(cur_word, words_indices, censor_words):
    """
    Return True, and the end index of the word in the text,
    if any word formed in words_indices is in `CENSOR_WORDSET`.

    Parameters
    ----------
    cur_word : str
        current word
    words_indices : list[int]
        words indices
    censor_words : list[str]
        censor words

    Returns
    -------
    end_index : int
        last index of censor word
    """
    full_word = cur_word.lower()
    full_word_with_separators = cur_word.lower()

    # Check both words in the pairs
    for index in iter(range(0, len(words_indices), 2)):
        single_word, end_index = words_indices[index]
        word_with_separators, _ = words_indices[index + 1]
        if single_word == "":
            continue

        full_word = "%s%s" % (full_word, single_word.lower())
        full_word_with_separators = "%s%s" % (
            full_word_with_separators,
            word_with_separators.lower()
        )
        # if full_word in censor_words or full_word_with_separators in censor_words:
        if full_word_with_separators in censor_words:
            return True, end_index
    return False, -1

`get_complete_path_of_file(filename)`

Join the path of the current directory with the input filename.

Parameters:

Name	Type	Description	Default
`filename`	`str`	name of the file	required

Returns:

Name	Type	Description
`filepath`	`str`	path of the file

Source code in better_profanity/utils.py

def get_complete_path_of_file(filename):
    """
    Join the path of the current directory with the input filename.

    Parameters
    ----------
    filename : str
        name of the file

    Returns
    -------
    filepath : str
        path of the file
    """
    root = os.path.abspath(os.path.dirname(__file__))
    filepath = os.path.join(root, filename)
    return filepath

`get_replacement_for_swear_word(censor_char)`

Replace swear words

Parameters:

Name	Type	Description	Default
`censor_char`	`str`	censor character	required

Returns:

Type	Description
`censor_char : str`	replaced censor character

Source code in better_profanity/utils.py

def get_replacement_for_swear_word(censor_char):
    """
    Replace swear words
    Parameters
    ----------
    censor_char : str
        censor character

    Returns
    -------
     censor_char : str
        replaced censor character
    """
    censor_char = censor_char * 4
    return censor_char

`read_wordlist(filename)`

Return words from a wordlist file.

Parameters:

Name	Type	Description	Default
`filename`	`str`	name of the file	required

Returns:

Name	Type	Description
`row`	`str`	words from a wordlist file

Source code in better_profanity/utils.py

def read_wordlist(filename: str):
    """
    Return words from a wordlist file.

    Parameters
    ----------
    filename : str
        name of the file

    Returns
    -------
    row : str
        words from a wordlist file
    """
    with open(filename, encoding="utf-8") as wordlist_file:
        for row in iter(wordlist_file):
            row = row.rstrip('\n')
            if row != "":
                yield row