import tiktoken from src.config import ENCODING_FOR_MODEL, ENCODING tiktoken.encoding_for_model(ENCODING_FOR_MODEL) tokenizer = tiktoken.get_encoding(ENCODING) def tiktoken_len(text: str) -> int: tokens = tokenizer.encode( text, disallowed_special=() ) return len(tokens) def num_tokens_from_string(string: str, encoding_name: str) -> int: """Returns the number of tokens in a text string.""" encoding = tiktoken.get_encoding(encoding_name) num_tokens = len(encoding.encode(string)) return num_tokens