Skip to main content
 首页 » 编程设计

python之尝试使用之前训练的 tf.keras 模型作为预训练,但得到“ValueError : Input 0 of layer dense_3 is incompatible with the laye

2024年05月29日108rubylouvre

我创建并训练了一个模型,用于根据编码为字符整数序列的发票字符串对啤酒名称进行分类。

batch_size = 512  # Batch size for training. 
epochs = 5  # Number of epochs to train for. 
 
model = Sequential() 
model.add(Dense(512, activation='relu')) 
model.add(Dropout(rate=0.2, noise_shape=None, seed=None)) 
model.add(Dense(512, activation='relu')) 
model.add(Dropout(rate=0.2, noise_shape=None, seed=None)) 
model.add(Dense(train_beer['product_name'].nunique(), activation='softmax')) 
 
optimizer = RMSprop(learning_rate=0.001) 
model.compile(loss=tf.keras.losses.CategoricalCrossentropy(), optimizer=optimizer, metrics=['accuracy']) 
model.fit(train_encoded, train_labels, epochs=epochs, batch_size=batch_size, validation_data=(test_encoded,test_labels)) 

现在我想使用前两层作为另一个模型的预训练,因此我删除了激活层并添加了一个新层并重新编译模型。 (请注意,出于测试目的,我重新添加了相同的图层

model.pop() 
model.add(Dense(train_beer['product_name'].nunique(), activation='softmax')) 
optimizer = RMSprop(learning_rate=0.001) 
model.compile(loss=tf.keras.losses.CategoricalCrossentropy(), optimizer=optimizer, metrics=['accuracy']) 
batch_size = 512  # Batch size for training. 
epochs = 5  # Number of epochs to train for. 
 
model.fit(train_encoded, train_labels, epochs=epochs, batch_size=batch_size, validation_data=(test_encoded,test_labels)) 

但我收到错误:

 Train on 313213 samples, validate on 16323 samples Epoch 1/5    512/313213 [..............................] - ETA: 29s 
--------------------------------------------------------------------------- ValueError                                Traceback (most recent call last) <ipython-input-13-e341e0cd9a82> in <module> 
      2 epochs = 5  # Number of epochs to train for. 
      3  
----> 4 model.fit(train_encoded, train_labels, epochs=epochs, batch_size=batch_size, validation_data=(test_encoded,test_labels)) 
 
~\.conda\envs\fintech_ml\lib\site-packages\tensorflow_core\python\keras\engine\training.py in fit(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_freq, max_queue_size, workers, use_multiprocessing, 
**kwargs) 
    726         max_queue_size=max_queue_size, 
    727         workers=workers, 
--> 728         use_multiprocessing=use_multiprocessing) 
    729  
    730   def evaluate(self, 
 
~\.conda\envs\fintech_ml\lib\site-packages\tensorflow_core\python\keras\engine\training_v2.py in fit(self, model, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_freq, **kwargs) 
    322                 mode=ModeKeys.TRAIN, 
    323                 training_context=training_context, 
--> 324                 total_epochs=epochs) 
    325             cbks.make_logs(model, epoch_logs, training_result, ModeKeys.TRAIN) 
    326  
 
~\.conda\envs\fintech_ml\lib\site-packages\tensorflow_core\python\keras\engine\training_v2.py in run_one_epoch(model, iterator, execution_function, dataset_size, batch_size, strategy, steps_per_epoch, num_samples, mode, training_context, total_epochs) 
    121         step=step, mode=mode, size=current_batch_size) as batch_logs: 
    122       try: 
--> 123         batch_outs = execution_function(iterator) 
    124       except (StopIteration, errors.OutOfRangeError): 
    125         # TODO(kaftan): File bug about tf function and errors.OutOfRangeError? 
 
~\.conda\envs\fintech_ml\lib\site-packages\tensorflow_core\python\keras\engine\training_v2_utils.py in execution_function(input_fn) 
     84     # `numpy` translates Tensors to values in Eager mode. 
     85     return nest.map_structure(_non_none_constant_value, 
---> 86                               distributed_function(input_fn)) 
     87  
     88   return execution_function 
 
~\.conda\envs\fintech_ml\lib\site-packages\tensorflow_core\python\eager\def_function.py in __call__(self, *args, **kwds) 
    455  
    456     tracing_count = self._get_tracing_count() 
--> 457     result = self._call(*args, **kwds) 
    458     if tracing_count == self._get_tracing_count(): 
    459       self._call_counter.called_without_tracing() 
 
~\.conda\envs\fintech_ml\lib\site-packages\tensorflow_core\python\eager\def_function.py in _call(self, *args, **kwds) 
    501       # This is the first call of __call__, so we have to initialize. 
    502       initializer_map = object_identity.ObjectIdentityDictionary() 
--> 503       self._initialize(args, kwds, add_initializers_to=initializer_map) 
    504     finally: 
    505       # At this point we know that the initialization is complete (or less 
 
~\.conda\envs\fintech_ml\lib\site-packages\tensorflow_core\python\eager\def_function.py in _initialize(self, args, kwds, add_initializers_to) 
    406     self._concrete_stateful_fn = ( 
    407         self._stateful_fn._get_concrete_function_internal_garbage_collected(  
# pylint: disable=protected-access 
--> 408             *args, **kwds)) 
    409  
    410     def invalid_creator_scope(*unused_args, **unused_kwds): 
 
~\.conda\envs\fintech_ml\lib\site-packages\tensorflow_core\python\eager\function.py in _get_concrete_function_internal_garbage_collected(self, *args, 
**kwargs)    1846     if self.input_signature:    1847       args, kwargs = None, None 
-> 1848     graph_function, _, _ = self._maybe_define_function(args, kwargs)    1849     return graph_function    1850  
 
~\.conda\envs\fintech_ml\lib\site-packages\tensorflow_core\python\eager\function.py in _maybe_define_function(self, args, kwargs)    2148         graph_function = self._function_cache.primary.get(cache_key, None)    2149         if graph_function is None: 
-> 2150           graph_function = self._create_graph_function(args, kwargs)    2151           self._function_cache.primary[cache_key] = graph_function    2152         return graph_function, args, kwargs 
 
~\.conda\envs\fintech_ml\lib\site-packages\tensorflow_core\python\eager\function.py in _create_graph_function(self, args, kwargs, override_flat_arg_shapes)    2039             arg_names=arg_names,    2040             override_flat_arg_shapes=override_flat_arg_shapes, 
-> 2041             capture_by_value=self._capture_by_value),    2042         self._function_attributes,    2043         # Tell the ConcreteFunction to clean up its graph once it goes out of 
 
~\.conda\envs\fintech_ml\lib\site-packages\tensorflow_core\python\framework\func_graph.py in func_graph_from_py_func(name, python_func, args, kwargs, signature, func_graph, autograph, autograph_options, add_control_dependencies, arg_names, op_return_value, collections, capture_by_value, override_flat_arg_shapes) 
    913                                           converted_func) 
    914  
--> 915       func_outputs = python_func(*func_args, **func_kwargs) 
    916  
    917       # invariant: `func_outputs` contains only Tensors, CompositeTensors, 
 
~\.conda\envs\fintech_ml\lib\site-packages\tensorflow_core\python\eager\def_function.py in wrapped_fn(*args, **kwds) 
    356         # __wrapped__ allows AutoGraph to swap in a converted function. We give 
    357         # the function a weak reference to itself to avoid a reference cycle. 
--> 358         return weak_wrapped_fn().__wrapped__(*args, **kwds) 
    359     weak_wrapped_fn = weakref.ref(wrapped_fn) 
    360  
 
~\.conda\envs\fintech_ml\lib\site-packages\tensorflow_core\python\keras\engine\training_v2_utils.py in distributed_function(input_iterator) 
     71     strategy = distribution_strategy_context.get_strategy() 
     72     outputs = strategy.experimental_run_v2( 
---> 73         per_replica_function, args=(model, x, y, sample_weights)) 
     74     # Out of PerReplica outputs reduce or pick values to return. 
     75     all_outputs = dist_utils.unwrap_output_dict( 
 
~\.conda\envs\fintech_ml\lib\site-packages\tensorflow_core\python\distribute\distribute_lib.py in experimental_run_v2(self, fn, args, kwargs) 
    758       fn = autograph.tf_convert(fn, ag_ctx.control_status_ctx(), 
    759                                 convert_by_default=False) 
--> 760       return self._extended.call_for_each_replica(fn, args=args, kwargs=kwargs) 
    761  
    762   def reduce(self, reduce_op, value, axis): 
 
~\.conda\envs\fintech_ml\lib\site-packages\tensorflow_core\python\distribute\distribute_lib.py in call_for_each_replica(self, fn, args, kwargs)    1785       kwargs 
= {}    1786     with self._container_strategy().scope(): 
-> 1787       return self._call_for_each_replica(fn, args, kwargs)    1788     1789   def _call_for_each_replica(self, fn, args, kwargs): 
 
~\.conda\envs\fintech_ml\lib\site-packages\tensorflow_core\python\distribute\distribute_lib.py in _call_for_each_replica(self, fn, args, kwargs)    2130         self._container_strategy(),    2131         replica_id_in_sync_group=constant_op.constant(0, dtypes.int32)): 
-> 2132       return fn(*args, **kwargs)    2133     2134   def _reduce_to(self, reduce_op, value, destinations): 
 
~\.conda\envs\fintech_ml\lib\site-packages\tensorflow_core\python\autograph\impl\api.py in wrapper(*args, **kwargs) 
    290   def wrapper(*args, **kwargs): 
    291     with ag_ctx.ControlStatusCtx(status=ag_ctx.Status.DISABLED): 
--> 292       return func(*args, **kwargs) 
    293  
    294   if inspect.isfunction(func) or inspect.ismethod(func): 
 
~\.conda\envs\fintech_ml\lib\site-packages\tensorflow_core\python\keras\engine\training_v2_utils.py in train_on_batch(model, x, y, sample_weight, class_weight, reset_metrics) 
    262       y, 
    263       sample_weights=sample_weights, 
--> 264       output_loss_metrics=model._output_loss_metrics) 
    265  
    266   if reset_metrics: 
 
~\.conda\envs\fintech_ml\lib\site-packages\tensorflow_core\python\keras\engine\training_eager.py in train_on_batch(model, inputs, targets, sample_weights, output_loss_metrics) 
    309           sample_weights=sample_weights, 
    310           training=True, 
--> 311           output_loss_metrics=output_loss_metrics)) 
    312   if not isinstance(outs, list): 
    313     outs = [outs] 
 
~\.conda\envs\fintech_ml\lib\site-packages\tensorflow_core\python\keras\engine\training_eager.py in _process_single_batch(model, inputs, targets, output_loss_metrics, sample_weights, training) 
    250               output_loss_metrics=output_loss_metrics, 
    251               sample_weights=sample_weights, 
--> 252               training=training)) 
    253       if total_loss is None: 
    254         raise ValueError('The model cannot be run ' 
 
~\.conda\envs\fintech_ml\lib\site-packages\tensorflow_core\python\keras\engine\training_eager.py in _model_loss(model, inputs, targets, output_loss_metrics, sample_weights, training) 
    125     inputs = nest.map_structure(ops.convert_to_tensor, inputs) 
    126  
--> 127   outs = model(inputs, **kwargs) 
    128   outs = nest.flatten(outs) 
    129  
 
~\.conda\envs\fintech_ml\lib\site-packages\tensorflow_core\python\keras\engine\base_layer.py in __call__(self, inputs, *args, **kwargs) 
    845                     outputs = base_layer_utils.mark_as_return(outputs, acd) 
    846                 else: 
--> 847                   outputs = call_fn(cast_inputs, *args, **kwargs) 
    848  
    849             except errors.OperatorNotAllowedInGraphError as e: 
 
~\.conda\envs\fintech_ml\lib\site-packages\tensorflow_core\python\keras\engine\sequential.py in call(self, inputs, training, mask) 
    268         kwargs['training'] = training 
    269  
--> 270       outputs = layer(inputs, **kwargs) 
    271  
    272       # `outputs` will be the inputs to the next layer. 
 
~\.conda\envs\fintech_ml\lib\site-packages\tensorflow_core\python\keras\engine\base_layer.py in __call__(self, inputs, *args, **kwargs) 
    810         # are casted, not before. 
    811         input_spec.assert_input_compatibility(self.input_spec, inputs, 
--> 812                                               self.name) 
    813         graph = backend.get_graph() 
    814         with graph.as_default(), backend.name_scope(self._name_scope()): 
 
~\.conda\envs\fintech_ml\lib\site-packages\tensorflow_core\python\keras\engine\input_spec.py in assert_input_compatibility(input_spec, inputs, layer_name) 
    211                 ' incompatible with the layer: expected axis ' + str(axis) + 
    212                 ' of input shape to have value ' + str(value) + 
--> 213                 ' but received input with shape ' + str(shape)) 
    214     # Check shape. 
    215     if spec.shape is not None: 
 
ValueError: Input 0 of layer dense_3 is incompatible with the layer: expected axis -1 of input shape to have value 6022 but received input with shape [None, 512] 

请您参考如下方法:

我无法真正告诉你为什么会发生这种情况(我可能必须查看源代码)。我怀疑这些层在某处被错误地重新连​​接。但您可以执行以下操作来使其正常工作。

new_model = Sequential() 
for l in model.layers[:-1]: 
  new_model.add(l) 
new_model.add(Dense(100, activation='softmax'))