You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
Create a transformer ner model
Train it on data using the cfg and cli which auto-saves it
Create a new cfg file that points to your existing model
Try triggering the training using the CLI
You will get a missing config.json error
Here's the CLI:
python -m spacy train '/home/user/Coding/PatientHistory/refine_pt_hist_ner.cfg' --output '/home/user/Coding/PatientHistory/improved_pt_hist_3_22_2024' --paths.train '/home/user/Coding/PatientHistory/train.spacy' --paths.dev '/home/user/Coding/PatientHistory/test.spacy' --gpu-id 0
Here's the output:
ℹ Saving to output directory:
/home/user/Coding/PatientHistory/improved_pt_hist_3_22_2024
ℹ Using GPU: 0
=========================== Initializing pipeline ===========================
/home/user/miniconda3/envs/patienthistoryclassifier/lib/python3.10/site-packages/transformers/utils/generic.py:441: UserWarning: torch.utils._pytree._register_pytree_node is deprecated. Please use torch.utils._pytree.register_pytree_node instead.
_torch_pytree._register_pytree_node(
/home/user/miniconda3/envs/patienthistoryclassifier/lib/python3.10/site-packages/transformers/utils/generic.py:309: UserWarning: torch.utils._pytree._register_pytree_node is deprecated. Please use torch.utils._pytree.register_pytree_node instead.
_torch_pytree._register_pytree_node(
Traceback (most recent call last):
File "/home/user/miniconda3/envs/patienthistoryclassifier/lib/python3.10/runpy.py", line 196, in _run_module_as_main
return _run_code(code, main_globals, None,
File "/home/user/miniconda3/envs/patienthistoryclassifier/lib/python3.10/runpy.py", line 86, in _run_code
exec(code, run_globals)
File "/home/user/miniconda3/envs/patienthistoryclassifier/lib/python3.10/site-packages/spacy/main.py", line 4, in
setup_cli()
File "/home/user/miniconda3/envs/patienthistoryclassifier/lib/python3.10/site-packages/spacy/cli/_util.py", line 87, in setup_cli
command(prog_name=COMMAND)
File "/home/user/miniconda3/envs/patienthistoryclassifier/lib/python3.10/site-packages/click/core.py", line 1157, in call
return self.main(*args, **kwargs)
File "/home/user/miniconda3/envs/patienthistoryclassifier/lib/python3.10/site-packages/typer/core.py", line 778, in main
return _main(
File "/home/user/miniconda3/envs/patienthistoryclassifier/lib/python3.10/site-packages/typer/core.py", line 216, in _main
rv = self.invoke(ctx)
File "/home/user/miniconda3/envs/patienthistoryclassifier/lib/python3.10/site-packages/click/core.py", line 1688, in invoke
return _process_result(sub_ctx.command.invoke(sub_ctx))
File "/home/user/miniconda3/envs/patienthistoryclassifier/lib/python3.10/site-packages/click/core.py", line 1434, in invoke
return ctx.invoke(self.callback, **ctx.params)
File "/home/user/miniconda3/envs/patienthistoryclassifier/lib/python3.10/site-packages/click/core.py", line 783, in invoke
return __callback(*args, **kwargs)
File "/home/user/miniconda3/envs/patienthistoryclassifier/lib/python3.10/site-packages/typer/main.py", line 683, in wrapper
return callback(**use_params) # type: ignore
File "/home/user/miniconda3/envs/patienthistoryclassifier/lib/python3.10/site-packages/spacy/cli/train.py", line 54, in train_cli
train(config_path, output_path, use_gpu=use_gpu, overrides=overrides)
File "/home/user/miniconda3/envs/patienthistoryclassifier/lib/python3.10/site-packages/spacy/cli/train.py", line 81, in train
nlp = init_nlp(config, use_gpu=use_gpu)
File "/home/user/miniconda3/envs/patienthistoryclassifier/lib/python3.10/site-packages/spacy/training/initialize.py", line 95, in init_nlp
nlp.initialize(lambda: train_corpus(nlp), sgd=optimizer)
File "/home/user/miniconda3/envs/patienthistoryclassifier/lib/python3.10/site-packages/spacy/language.py", line 1349, in initialize
proc.initialize(get_examples, nlp=self, **p_settings)
File "/home/user/miniconda3/envs/patienthistoryclassifier/lib/python3.10/site-packages/spacy_transformers/pipeline_component.py", line 351, in initialize
self.model.initialize(X=docs)
File "/home/user/miniconda3/envs/patienthistoryclassifier/lib/python3.10/site-packages/thinc/model.py", line 318, in initialize
self.init(self, X=X, Y=Y)
File "/home/user/miniconda3/envs/patienthistoryclassifier/lib/python3.10/site-packages/spacy_transformers/layers/transformer_model.py", line 131, in init
hf_model = huggingface_from_pretrained(name, tok_cfg, trf_cfg)
File "/home/user/miniconda3/envs/patienthistoryclassifier/lib/python3.10/site-packages/spacy_transformers/layers/transformer_model.py", line 267, in huggingface_from_pretrained
tokenizer = tokenizer_cls.from_pretrained(str_path, **tok_config)
File "/home/user/miniconda3/envs/patienthistoryclassifier/lib/python3.10/site-packages/transformers/models/auto/tokenization_auto.py", line 752, in from_pretrained
config = AutoConfig.from_pretrained(
File "/home/user/miniconda3/envs/patienthistoryclassifier/lib/python3.10/site-packages/transformers/models/auto/configuration_auto.py", line 1082, in from_pretrained
config_dict, unused_kwargs = PretrainedConfig.get_config_dict(pretrained_model_name_or_path, **kwargs)
File "/home/user/miniconda3/envs/patienthistoryclassifier/lib/python3.10/site-packages/transformers/configuration_utils.py", line 644, in get_config_dict
config_dict, kwargs = cls._get_config_dict(pretrained_model_name_or_path, **kwargs)
File "/home/user/miniconda3/envs/patienthistoryclassifier/lib/python3.10/site-packages/transformers/configuration_utils.py", line 699, in _get_config_dict
resolved_config_file = cached_file(
File "/home/user/miniconda3/envs/patienthistoryclassifier/lib/python3.10/site-packages/transformers/utils/hub.py", line 360, in cached_file
raise EnvironmentError(
OSError: /home/user/Coding/PatientHistory/original_pt_hist_ner does not appear to have a file named config.json. Checkout 'https://huggingface.co//home/user/Coding/PatientHistory/original_pt_hist_ner/None' for available files.
The text was updated successfully, but these errors were encountered:
Discussed in #13394
Originally posted by jlustgarten March 23, 2024
How to reproduce the behaviour
Create a transformer ner model
Train it on data using the cfg and cli which auto-saves it
Create a new cfg file that points to your existing model
Try triggering the training using the CLI
You will get a missing config.json error
Your Environment
This still is occurring with the same text:
Config:
[paths]
train = null
dev = null
vectors = null
init_tok2vec = null
[system]
gpu_allocator = "pytorch"
seed = 0
[nlp]
lang = "en"
pipeline = ["transformer","ner"]
batch_size = 128
disabled = []
before_creation = null
after_creation = null
after_pipeline_creation = null
tokenizer = {"@Tokenizers":"spacy.Tokenizer.v1"}
vectors = {"@vectors":"spacy.Vectors.v1"}
[components]
[components.ner]
factory = "ner"
incorrect_spans_key = null
moves = null
scorer = {"https://github.com/scorers":"spacy.ner_scorer.v1"}
update_with_oracle_cut_size = 100
[components.ner.model]
@architectures = "spacy.TransitionBasedParser.v2"
state_type = "ner"
extra_state_tokens = false
hidden_width = 64
maxout_pieces = 2
use_upper = false
nO = null
[components.ner.model.tok2vec]
@architectures = "spacy-transformers.TransformerListener.v1"
grad_factor = 1.0
pooling = {"https://github.com/layers":"reduce_mean.v1"}
upstream = "*"
[components.transformer]
factory = "transformer"
max_batch_items = 4096
set_extra_annotations = {"@annotation_setters":"spacy-transformers.null_annotation_setter.v1"}
[components.transformer.model]
@architectures = "spacy-transformers.TransformerModel.v3"
name = "/home/user/Coding/PatientHistory/original_pt_hist_ner"
mixed_precision = false
[components.transformer.model.get_spans]
@span_getters = "spacy-transformers.strided_spans.v1"
window = 128
stride = 96
[components.transformer.model.grad_scaler_config]
[components.transformer.model.tokenizer_config]
use_fast = true
[components.transformer.model.transformer_config]
[corpora]
[corpora.dev]
@readers = "spacy.Corpus.v1"
path = ${paths.dev}
max_length = 0
gold_preproc = false
limit = 0
augmenter = null
[corpora.train]
@readers = "spacy.Corpus.v1"
path = ${paths.train}
max_length = 0
gold_preproc = false
limit = 0
augmenter = null
[training]
accumulate_gradient = 4
dev_corpus = "corpora.dev"
train_corpus = "corpora.train"
seed = ${system.seed}
gpu_allocator = ${system.gpu_allocator}
dropout = 0.1
patience = 2000
max_epochs = 0
max_steps = 80000
eval_frequency = 200
frozen_components = []
annotating_components = []
before_to_disk = null
before_update = null
[training.batcher]
@batchers = "spacy.batch_by_padded.v1"
discard_oversize = false
size = 2000
buffer = 256
get_length = null
[training.logger]
@Loggers = "spacy.ConsoleLogger.v1"
progress_bar = false
[training.optimizer]
https://github.com/optimizers = "Adam.v1"
beta1 = 0.9
beta2 = 0.999
L2_is_weight_decay = true
L2 = 0.01
grad_clip = 1.0
use_averages = false
eps = 0.00000001
[training.optimizer.learn_rate]
https://github.com/schedules = "warmup_linear.v1"
warmup_steps = 250
total_steps = 200000
initial_rate = 0.00005
[training.score_weights]
ents_f = 1.0
ents_p = 0.0
ents_r = 0.0
ents_per_type = null
[pretraining]
[initialize]
vectors = ${paths.vectors}
init_tok2vec = ${paths.init_tok2vec}
vocab_data = null
lookups = null
before_init = null
after_init = null
[initialize.components]
[initialize.tokenizer]
Here's the CLI:
python -m spacy train '/home/user/Coding/PatientHistory/refine_pt_hist_ner.cfg' --output '/home/user/Coding/PatientHistory/improved_pt_hist_3_22_2024' --paths.train '/home/user/Coding/PatientHistory/train.spacy' --paths.dev '/home/user/Coding/PatientHistory/test.spacy' --gpu-id 0
Here's the output:
ℹ Saving to output directory:
/home/user/Coding/PatientHistory/improved_pt_hist_3_22_2024
ℹ Using GPU: 0
=========================== Initializing pipeline ===========================
/home/user/miniconda3/envs/patienthistoryclassifier/lib/python3.10/site-packages/transformers/utils/generic.py:441: UserWarning: torch.utils._pytree._register_pytree_node is deprecated. Please use torch.utils._pytree.register_pytree_node instead.
_torch_pytree._register_pytree_node(
/home/user/miniconda3/envs/patienthistoryclassifier/lib/python3.10/site-packages/transformers/utils/generic.py:309: UserWarning: torch.utils._pytree._register_pytree_node is deprecated. Please use torch.utils._pytree.register_pytree_node instead.
_torch_pytree._register_pytree_node(
Traceback (most recent call last):
File "/home/user/miniconda3/envs/patienthistoryclassifier/lib/python3.10/runpy.py", line 196, in _run_module_as_main
return _run_code(code, main_globals, None,
File "/home/user/miniconda3/envs/patienthistoryclassifier/lib/python3.10/runpy.py", line 86, in _run_code
exec(code, run_globals)
File "/home/user/miniconda3/envs/patienthistoryclassifier/lib/python3.10/site-packages/spacy/main.py", line 4, in
setup_cli()
File "/home/user/miniconda3/envs/patienthistoryclassifier/lib/python3.10/site-packages/spacy/cli/_util.py", line 87, in setup_cli
command(prog_name=COMMAND)
File "/home/user/miniconda3/envs/patienthistoryclassifier/lib/python3.10/site-packages/click/core.py", line 1157, in call
return self.main(*args, **kwargs)
File "/home/user/miniconda3/envs/patienthistoryclassifier/lib/python3.10/site-packages/typer/core.py", line 778, in main
return _main(
File "/home/user/miniconda3/envs/patienthistoryclassifier/lib/python3.10/site-packages/typer/core.py", line 216, in _main
rv = self.invoke(ctx)
File "/home/user/miniconda3/envs/patienthistoryclassifier/lib/python3.10/site-packages/click/core.py", line 1688, in invoke
return _process_result(sub_ctx.command.invoke(sub_ctx))
File "/home/user/miniconda3/envs/patienthistoryclassifier/lib/python3.10/site-packages/click/core.py", line 1434, in invoke
return ctx.invoke(self.callback, **ctx.params)
File "/home/user/miniconda3/envs/patienthistoryclassifier/lib/python3.10/site-packages/click/core.py", line 783, in invoke
return __callback(*args, **kwargs)
File "/home/user/miniconda3/envs/patienthistoryclassifier/lib/python3.10/site-packages/typer/main.py", line 683, in wrapper
return callback(**use_params) # type: ignore
File "/home/user/miniconda3/envs/patienthistoryclassifier/lib/python3.10/site-packages/spacy/cli/train.py", line 54, in train_cli
train(config_path, output_path, use_gpu=use_gpu, overrides=overrides)
File "/home/user/miniconda3/envs/patienthistoryclassifier/lib/python3.10/site-packages/spacy/cli/train.py", line 81, in train
nlp = init_nlp(config, use_gpu=use_gpu)
File "/home/user/miniconda3/envs/patienthistoryclassifier/lib/python3.10/site-packages/spacy/training/initialize.py", line 95, in init_nlp
nlp.initialize(lambda: train_corpus(nlp), sgd=optimizer)
File "/home/user/miniconda3/envs/patienthistoryclassifier/lib/python3.10/site-packages/spacy/language.py", line 1349, in initialize
proc.initialize(get_examples, nlp=self, **p_settings)
File "/home/user/miniconda3/envs/patienthistoryclassifier/lib/python3.10/site-packages/spacy_transformers/pipeline_component.py", line 351, in initialize
self.model.initialize(X=docs)
File "/home/user/miniconda3/envs/patienthistoryclassifier/lib/python3.10/site-packages/thinc/model.py", line 318, in initialize
self.init(self, X=X, Y=Y)
File "/home/user/miniconda3/envs/patienthistoryclassifier/lib/python3.10/site-packages/spacy_transformers/layers/transformer_model.py", line 131, in init
hf_model = huggingface_from_pretrained(name, tok_cfg, trf_cfg)
File "/home/user/miniconda3/envs/patienthistoryclassifier/lib/python3.10/site-packages/spacy_transformers/layers/transformer_model.py", line 267, in huggingface_from_pretrained
tokenizer = tokenizer_cls.from_pretrained(str_path, **tok_config)
File "/home/user/miniconda3/envs/patienthistoryclassifier/lib/python3.10/site-packages/transformers/models/auto/tokenization_auto.py", line 752, in from_pretrained
config = AutoConfig.from_pretrained(
File "/home/user/miniconda3/envs/patienthistoryclassifier/lib/python3.10/site-packages/transformers/models/auto/configuration_auto.py", line 1082, in from_pretrained
config_dict, unused_kwargs = PretrainedConfig.get_config_dict(pretrained_model_name_or_path, **kwargs)
File "/home/user/miniconda3/envs/patienthistoryclassifier/lib/python3.10/site-packages/transformers/configuration_utils.py", line 644, in get_config_dict
config_dict, kwargs = cls._get_config_dict(pretrained_model_name_or_path, **kwargs)
File "/home/user/miniconda3/envs/patienthistoryclassifier/lib/python3.10/site-packages/transformers/configuration_utils.py", line 699, in _get_config_dict
resolved_config_file = cached_file(
File "/home/user/miniconda3/envs/patienthistoryclassifier/lib/python3.10/site-packages/transformers/utils/hub.py", line 360, in cached_file
raise EnvironmentError(
OSError: /home/user/Coding/PatientHistory/original_pt_hist_ner does not appear to have a file named config.json. Checkout 'https://huggingface.co//home/user/Coding/PatientHistory/original_pt_hist_ner/None' for available files.
The text was updated successfully, but these errors were encountered: