27 lines
818 B
Python
27 lines
818 B
Python
import argparse
|
|
|
|
from transformers import AutoTokenizer
|
|
|
|
|
|
def main(model, cachedir):
|
|
# Load the tokenizer and save it to the specified directory
|
|
tokenizer = AutoTokenizer.from_pretrained(model)
|
|
tokenizer.save_pretrained(cachedir)
|
|
print(f"Tokenizer saved to {cachedir}")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
parser = argparse.ArgumentParser(
|
|
description="Download and save Hugging Face tokenizer")
|
|
parser.add_argument("--model",
|
|
type=str,
|
|
required=True,
|
|
help="Name of the model")
|
|
parser.add_argument("--cachedir",
|
|
type=str,
|
|
required=True,
|
|
help="Directory to save the tokenizer")
|
|
|
|
args = parser.parse_args()
|
|
main(args.model, args.cachedir)
|