Skip to content

Utils

In this file implemented the helper function

add_custom_token(nlp, spacial_token, token_attrs)

Add custom token

Parameters:

Name Type Description Default
nlp object

pipeline

required
spacial_token string

the string to specially tokenize

required
token_attrs list[dict]

a list of dicts, where each dict describes a token and its attributes.

required
Source code in app/utils.py
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
def add_custom_token(nlp, spacial_token, token_attrs):
    """
    Add custom token
    Parameters
    ----------
    nlp : object
        pipeline
    spacial_token : string
        the string to specially tokenize
    token_attrs : list[dict]
        a list of dicts, where each dict describes a token and its attributes.

    Returns
    -------

    """
    nlp.tokenizer.add_special_case(spacial_token, token_attrs)

generate_guid(k=6)

Generate random id

Parameters:

Name Type Description Default
k int

length of guid

6

Returns:

Name Type Description
guid str

random id

Source code in app/utils.py
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
def generate_guid(k=6):
    """
    Generate random id
    Parameters
    ----------
    k : int
        length of guid

    Returns
    -------
    guid : str
        random id
    """

    guid = ''.join(random.choices(string.ascii_uppercase + string.digits, k=k))
    return guid

generate_openapi_json(openapi_version='3.1.0')

Generate and save openapi specification of the application

Parameters:

Name Type Description Default
openapi_version str

openapi version specifier

'3.1.0'

Returns:

Type Description
None
Source code in app/utils.py
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
def generate_openapi_json(openapi_version="3.1.0"):
    """
    Generate and save openapi specification of the application

    Parameters
    ----------
    openapi_version : str
        openapi version specifier

    Returns
    -------
    None
    """
    parser = ArgumentParser()
    parser.add_argument('--app_path')
    parser.add_argument('--server_url', default='0.0.0.0:8000')
    args = parser.parse_args()
    app_path = args.app_path
    server_url = args.server_url
    module = importlib.import_module(app_path)
    app = module.app

    openapi_json = get_openapi(
        title=app.title,
        openapi_version=openapi_version,
        version=app.version,
        description=app.description,
        routes=app.routes,
        servers=[{'url':server_url}]
    )
    json_save('openapi.json', openapi_json)

json_load(path, encoding='utf-8')

Load json files from path

Parameters:

Name Type Description Default
path str

path to save object

required
encoding str

encoding type

'utf-8'

Returns:

Name Type Description
obj (Object) object
Source code in app/utils.py
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
def json_load(path, encoding='utf-8'):
    """
    Load json files from path
    Parameters
    ----------
    path : str
        path to save object
    encoding : str
        encoding type

    Returns
    -------
    obj : (Object) object

    """
    with open(path, 'r', encoding=encoding) as file:
        obj = json.load(file)
        return obj

json_save(path, obj)

Save supported files to json format

Parameters:

Name Type Description Default
path str

path to save object

required
obj (Object) object
required
Source code in app/utils.py
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
def json_save(path, obj):
    """
    Save supported files to json format
    Parameters
    ----------
    path : str
        path to save object
    -------
    obj : (Object) object

    Returns
    ---------
    """
    with open(path, 'w', encoding='utf-8') as file:
        json.dump(obj, file)

pickle_load(path)

Load supported pickle format files

Parameters:

Name Type Description Default
path str: path to save object
required

Returns:

Name Type Description
obj (Object) any object with any pickle supported format
Source code in app/utils.py
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
def pickle_load(path):
    """
    Load supported pickle format files
    Parameters
    ----------
    path : str: path to save object

    Returns
    -------
    obj : (Object) any object with any pickle supported format
    """

    with open(path, 'rb') as file:
        obj = pickle.load(file)

        return obj

pickle_save(path, obj)

Save supported files in pickle format

Parameters:

Name Type Description Default
path str: path to save object
required
obj object: any pickle serializable object
required
Source code in app/utils.py
32
33
34
35
36
37
38
39
40
41
42
43
44
def pickle_save(path, obj):
    """
    Save supported files in pickle format
    Parameters
    ----------
    path : str: path to save object
    obj : object: any pickle serializable object

    Returns
    -------
    """
    with open(path, 'wb') as file:
        pickle.dump(obj, file)

any_next_words_form_swear_word(cur_word, words_indices, censor_words)

Return True, and the end index of the word in the text, if any word formed in words_indices is in CENSOR_WORDSET.

Parameters:

Name Type Description Default
cur_word str

current word

required
words_indices list[int]

words indices

required
censor_words list[str]

censor words

required

Returns:

Name Type Description
end_index int

last index of censor word

Source code in better_profanity/utils.py
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
def any_next_words_form_swear_word(cur_word, words_indices, censor_words):
    """
    Return True, and the end index of the word in the text,
    if any word formed in words_indices is in `CENSOR_WORDSET`.

    Parameters
    ----------
    cur_word : str
        current word
    words_indices : list[int]
        words indices
    censor_words : list[str]
        censor words

    Returns
    -------
    end_index : int
        last index of censor word
    """
    full_word = cur_word.lower()
    full_word_with_separators = cur_word.lower()

    # Check both words in the pairs
    for index in iter(range(0, len(words_indices), 2)):
        single_word, end_index = words_indices[index]
        word_with_separators, _ = words_indices[index + 1]
        if single_word == "":
            continue

        full_word = "%s%s" % (full_word, single_word.lower())
        full_word_with_separators = "%s%s" % (
            full_word_with_separators,
            word_with_separators.lower()
        )
        # if full_word in censor_words or full_word_with_separators in censor_words:
        if full_word_with_separators in censor_words:
            return True, end_index
    return False, -1

get_complete_path_of_file(filename)

Join the path of the current directory with the input filename.

Parameters:

Name Type Description Default
filename str

name of the file

required

Returns:

Name Type Description
filepath str

path of the file

Source code in better_profanity/utils.py
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
def get_complete_path_of_file(filename):
    """
    Join the path of the current directory with the input filename.

    Parameters
    ----------
    filename : str
        name of the file

    Returns
    -------
    filepath : str
        path of the file
    """
    root = os.path.abspath(os.path.dirname(__file__))
    filepath = os.path.join(root, filename)
    return filepath

get_replacement_for_swear_word(censor_char)

Replace swear words

Parameters:

Name Type Description Default
censor_char str

censor character

required

Returns:

Type Description
censor_char : str

replaced censor character

Source code in better_profanity/utils.py
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
def get_replacement_for_swear_word(censor_char):
    """
    Replace swear words
    Parameters
    ----------
    censor_char : str
        censor character

    Returns
    -------
     censor_char : str
        replaced censor character
    """
    censor_char = censor_char * 4
    return censor_char

read_wordlist(filename)

Return words from a wordlist file.

Parameters:

Name Type Description Default
filename str

name of the file

required

Returns:

Name Type Description
row str

words from a wordlist file

Source code in better_profanity/utils.py
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
def read_wordlist(filename: str):
    """
    Return words from a wordlist file.

    Parameters
    ----------
    filename : str
        name of the file

    Returns
    -------
    row : str
        words from a wordlist file
    """
    with open(filename, encoding="utf-8") as wordlist_file:
        for row in iter(wordlist_file):
            row = row.rstrip('\n')
            if row != "":
                yield row