[Core][Bugfix] cache len of tokenizer (#3741)
This commit is contained in:
parent
991143cfcd
commit
203d4f82ac
@ -26,6 +26,7 @@ def get_cached_tokenizer(
|
|||||||
tokenizer_all_special_tokens_extended = (
|
tokenizer_all_special_tokens_extended = (
|
||||||
tokenizer.all_special_tokens_extended)
|
tokenizer.all_special_tokens_extended)
|
||||||
tokenizer_all_special_tokens = set(tokenizer.all_special_tokens)
|
tokenizer_all_special_tokens = set(tokenizer.all_special_tokens)
|
||||||
|
tokenizer_len = len(tokenizer)
|
||||||
|
|
||||||
class CachedTokenizer(tokenizer.__class__):
|
class CachedTokenizer(tokenizer.__class__):
|
||||||
|
|
||||||
@ -41,6 +42,9 @@ def get_cached_tokenizer(
|
|||||||
def all_special_tokens_extended(self):
|
def all_special_tokens_extended(self):
|
||||||
return tokenizer_all_special_tokens_extended
|
return tokenizer_all_special_tokens_extended
|
||||||
|
|
||||||
|
def __len__(self):
|
||||||
|
return tokenizer_len
|
||||||
|
|
||||||
CachedTokenizer.__name__ = f"Cached{tokenizer.__class__.__name__}"
|
CachedTokenizer.__name__ = f"Cached{tokenizer.__class__.__name__}"
|
||||||
|
|
||||||
tokenizer.__class__ = CachedTokenizer
|
tokenizer.__class__ = CachedTokenizer
|
||||||
|
Loading…
x
Reference in New Issue
Block a user