2015-08-16 5 views
2

Направлено в Лазанья и Теано с модифицированным mnist.py (основным примером лазанья) для обучения очень простой XOR.Lasagne/Theano неправильное количество измерений

import numpy as np 

import theano 
import theano.tensor as T 

import time 

import lasagne 

X_train = [[[[0, 0], [0, 1], [1, 0], [1, 1]]]] # (1) 
y_train = [[[[1, 0], [0, 1], [0, 1], [1, 0]]]] 

# [0,  1,  1,  0] 

X_train = np.array(X_train).astype(np.uint8) 
y_train = np.array(y_train).astype(np.uint8) 


print X_train.shape 

X_val = X_train 
y_val = y_train 

X_test = X_train 
y_test = y_train 


def build_mlp(input_var=None): 
    # This creates an MLP of two hidden layers of 800 units each, followed by 
    # a softmax output layer of 10 units. It applies 20% dropout to the input 
    # data and 50% dropout to the hidden layers. 

    # Input layer, specifying the expected input shape of the network 
    # (unspecified batchsize, 1 channel, 28 rows and 28 columns) and 
    # linking it to the given Theano variable `input_var`, if any: 
    l_in = lasagne.layers.InputLayer(shape=(None, 1, 4, 2), # (2) 
            input_var=input_var) 

    # Apply 20% dropout to the input data: 
    # l_in_drop = lasagne.layers.DropoutLayer(l_in, p=0.2) 

    # Add a fully-connected layer of 800 units, using the linear rectifier, and 
    # initializing weights with Glorot's scheme (which is the default anyway): 
    l_hid1 = lasagne.layers.DenseLayer(
      l_in, num_units=4, 
      nonlinearity=lasagne.nonlinearities.rectify, 
      W=lasagne.init.GlorotUniform()) 

    # Finally, we'll add the fully-connected output layer, of 10 softmax units: 
    l_out = lasagne.layers.DenseLayer(
      l_hid1, num_units=2, 
      nonlinearity=lasagne.nonlinearities.softmax) 

    # Each layer is linked to its incoming layer(s), so we only need to pass 
    # the output layer to give access to a network in Lasagne: 
    return l_out 

# Prepare Theano variables for inputs and targets 
input_var = T.tensor4('inputs') 
target_var = T.ivector('targets') 

network = build_mlp(input_var) 

# Create a loss expression for training, i.e., a scalar objective we want 
# to minimize (for our multi-class problem, it is the cross-entropy loss): 
prediction = lasagne.layers.get_output(network) 
loss = lasagne.objectives.categorical_crossentropy(prediction, target_var) 
loss = loss.mean() 
# We could add some weight decay as well here, see lasagne.regularization. 

# Create update expressions for training, i.e., how to modify the 
# parameters at each training step. Here, we'll use Stochastic Gradient 
# Descent (SGD) with Nesterov momentum, but Lasagne offers plenty more. 
params = lasagne.layers.get_all_params(network, trainable=True) 
updates = lasagne.updates.nesterov_momentum(
     loss, params, learning_rate=0.01, momentum=0.9) 

# Create a loss expression for validation/testing. The crucial difference 
# here is that we do a deterministic forward pass through the network, 
# disabling dropout layers. 
test_prediction = lasagne.layers.get_output(network, deterministic=True) 
test_loss = lasagne.objectives.categorical_crossentropy(test_prediction, 
                 target_var) 
test_loss = test_loss.mean() 
# As a bonus, also create an expression for the classification accuracy: 
test_acc = T.mean(T.eq(T.argmax(test_prediction, axis=1), target_var), 
        dtype=theano.config.floatX) 

# Compile a function performing a training step on a mini-batch (by giving 
# the updates dictionary) and returning the corresponding training loss: 
train_fn = theano.function([input_var, target_var], loss, updates=updates) 

# Compile a second function computing the validation loss and accuracy: 
val_fn = theano.function([input_var, target_var], [test_loss, test_acc]) 


# ############################# Batch iterator ############################### 
# This is just a simple helper function iterating over training data in 
# mini-batches of a particular size, optionally in random order. It assumes 
# data is available as numpy arrays. For big datasets, you could load numpy 
# arrays as memory-mapped files (np.load(..., mmap_mode='r')), or write your 
# own custom data iteration function. For small datasets, you can also copy 
# them to GPU at once for slightly improved performance. This would involve 
# several changes in the main program, though, and is not demonstrated here. 

def iterate_minibatches(inputs, targets, batchsize, shuffle=False): 
    assert len(inputs) == len(targets) 
    if shuffle: 
     indices = np.arange(len(inputs)) 
     np.random.shuffle(indices) 

    for start_idx in range(0, len(inputs) - batchsize + 1, batchsize): 
     if shuffle: 
      excerpt = indices[start_idx:start_idx + batchsize] 
     else: 
      excerpt = slice(start_idx, start_idx + batchsize) 
     yield inputs[excerpt], targets[excerpt] 
    else: 
     if shuffle: 
      excerpt = indices[0:len(inputs)] 
     else: 
      excerpt = slice(0, len(inputs)) 
     yield inputs[excerpt], targets[excerpt] 


num_epochs = 4 

# Finally, launch the training loop. 
print("Starting training...") 
# We iterate over epochs: 
for epoch in range(num_epochs): 
    # In each epoch, we do a full pass over the training data: 
    train_err = 0 
    train_batches = 0 
    start_time = time.time() 
    for batch in iterate_minibatches(X_train, y_train, 4, shuffle=True): 
     inputs, targets = batch 
     print inputs.shape, targets.shape, input_var.shape, input_var.ndim, inputs.ndim 
     train_err += train_fn(inputs, targets) # (3) 
     train_batches += 1 

    # And a full pass over the validation data: 
    val_err = 0 
    val_acc = 0 
    val_batches = 0 
    for batch in iterate_minibatches(X_val, y_val, 4, shuffle=False): 
     inputs, targets = batch 
     err, acc = val_fn(inputs, targets) 
     val_err += err 
     val_acc += acc 
     val_batches += 1 

    # Then we print the results for this epoch: 
    print("Epoch {} of {} took {:.3f}s".format(
     epoch + 1, num_epochs, time.time() - start_time)) 
    print(" training loss:\t\t{:.6f}".format(train_err/train_batches)) 
    print(" validation loss:\t\t{:.6f}".format(val_err/val_batches)) 
    print(" validation accuracy:\t\t{:.2f} %".format(
     val_acc/val_batches * 100)) 

# After training, we compute and print the test error: 
test_err = 0 
test_acc = 0 
test_batches = 0 
for batch in iterate_minibatches(X_test, y_test, 500, shuffle=False): 
    inputs, targets = batch 
    err, acc = val_fn(inputs, targets) 
    test_err += err 
    test_acc += acc 
    test_batches += 1 
print("Final results:") 
print(" test loss:\t\t\t{:.6f}".format(test_err/test_batches)) 
print(" test accuracy:\t\t{:.2f} %".format(
    test_acc/test_batches * 100)) 

# Optionally, you could now dump the network weights to a file like this: 
# np.savez('model.npz', lasagne.layers.get_all_param_values(network)) 

Defined обучающий набор в (1), модифицировал свой вклад в новое измерение в точке (2) и получить исключение в (3):

Traceback (most recent call last): 
    File "test.py", line 139, in <module> 
    train_err += train_fn(inputs, targets) 
    File "/usr/local/lib/python2.7/site-packages/theano/compile/function_module.py", line 513, in __call__ 
    allow_downcast=s.allow_downcast) 
    File "/usr/local/lib/python2.7/site-packages/theano/tensor/type.py", line 169, in filter 
    data.shape)) 
TypeError: ('Bad input argument to theano function with name "test.py:91" at index 1(0-based)', 'Wrong number of dimensions: expected 1, got 4 with shape (1, 1, 4, 2).') 

И я понятия не имею, что я сделал не так. Когда я печатаю размер (или выход программы до исключения), я получаю это

(1, 1, 4, 2) 
Starting training... 
(1, 1, 4, 2) (1, 1, 4, 2) Shape.0 4 4 

Какой кажется идеальным. Что я делаю неправильно и как должен быть сформирован массив для работы?

ответ

5

Проблема со вторым входом, targets. Обратите внимание, что сообщение об ошибке указывает на это, указав «... в индексе 1 (основанный на 0) ...», то есть второй параметр.

target_var является ivector, но вы предоставляете 4-мерный тензор для targets. Решение состоит в том, чтобы изменить ваш y_train набор данных, так что 1-мерным:

y_train = [0, 1, 1, 0] 

Это вызовет другую ошибку, потому что вы в настоящее время утверждают, что первое измерение входов и цели должны соответствовать, но изменение

assert len(inputs) == len(targets) 

в

assert inputs.shape[2] == len(targets) 

будет решить вторую проблему и разрешить сценарий для успешного запуска.

Смежные вопросы