Функция ускорения с использованием cython

Я пытаюсь ускорить выполнение одной из моих функций.Функция ускорения с использованием cython

def get_scale_local_maximas(cube_coordinates, laplacian_cube): 
""" 
Check provided cube coordinate for scale space local maximas. 
Returns only the points that satisfy the criteria. 

A point is considered to be a local maxima if its value is greater 
than the value of the point on the next scale level and the point 
on the previous scale level. If the tested point is located on the 
first scale level or on the last one, then only one inequality should 
hold in order for this point to be local scale maxima. 

Parameters 
---------- 
cube_coordinates : (n, 3) ndarray 
     A 2d array with each row representing 3 values, ``(y,x,scale_level)`` 
     where ``(y,x)`` are coordinates of the blob and ``scale_level`` is the 
     position of a point in scale space. 
laplacian_cube : ndarray of floats 
    Laplacian of Gaussian scale space. 

Returns 
------- 
output : (n, 3) ndarray 
    cube_coordinates that satisfy the local maximum criteria in 
    scale space. 

Examples 
-------- 
>>> one = np.array([[1, 2, 3], [4, 5, 6]]) 
>>> two = np.array([[7, 8, 9], [10, 11, 12]]) 
>>> three = np.array([[0, 0, 0], [0, 0, 0]]) 
>>> check_coords = np.array([[1, 0, 1], [1, 0, 0], [1, 0, 2]]) 
>>> lapl_dummy = np.dstack([one, two, three]) 
>>> get_scale_local_maximas(check_coords, lapl_dummy) 
array([[1, 0, 1]]) 
""" 

amount_of_layers = laplacian_cube.shape[2] 
amount_of_points = cube_coordinates.shape[0] 

# Preallocate index. Fill it with False. 
accepted_points_index = np.ones(amount_of_points, dtype=bool) 

for point_index, interest_point_coords in enumerate(cube_coordinates): 
    # Row coordinate 
    y_coord = interest_point_coords[0] 
    # Column coordinate 
    x_coord = interest_point_coords[1] 
    # Layer number starting from the smallest sigma 
    point_layer = interest_point_coords[2] 
    point_response = laplacian_cube[y_coord, x_coord, point_layer] 

    # Check the point under the current one 
    if point_layer != 0: 
     lower_point_response = laplacian_cube[y_coord, x_coord, point_layer-1] 
     if lower_point_response >= point_response: 
      accepted_points_index[point_index] = False 
      continue 

    # Check the point above the current one 
    if point_layer != (amount_of_layers-1): 
     upper_point_response = laplacian_cube[y_coord, x_coord, point_layer+1] 
     if upper_point_response >= point_response: 
      accepted_points_index[point_index] = False 
      continue 

# Return only accepted points 
return cube_coordinates[accepted_points_index]

Это моя попытка ускорить его помощью Cython:

# cython: cdivision=True 
# cython: boundscheck=False 
# cython: nonecheck=False 
# cython: wraparound=False 
import numpy as np 
cimport numpy as cnp 

def get_scale_local_maximas(cube_coordinates, cnp.ndarray[cnp.double_t, ndim=3] laplacian_cube): 
""" 
Check provided cube coordinate for scale space local maximas. 
Returns only the points that satisfy the criteria. 

A point is considered to be a local maxima if its value is greater 
than the value of the point on the next scale level and the point 
on the previous scale level. If the tested point is located on the 
first scale level or on the last one, then only one inequality should 
hold in order for this point to be local scale maxima. 

Parameters 
---------- 
cube_coordinates : (n, 3) ndarray 
     A 2d array with each row representing 3 values, ``(y,x,scale_level)`` 
     where ``(y,x)`` are coordinates of the blob and ``scale_level`` is the 
     position of a point in scale space. 
laplacian_cube : ndarray of floats 
    Laplacian of Gaussian scale space. 

Returns 
------- 
output : (n, 3) ndarray 
    cube_coordinates that satisfy the local maximum criteria in 
    scale space. 

Examples 
-------- 
>>> one = np.array([[1, 2, 3], [4, 5, 6]]) 
>>> two = np.array([[7, 8, 9], [10, 11, 12]]) 
>>> three = np.array([[0, 0, 0], [0, 0, 0]]) 
>>> check_coords = np.array([[1, 0, 1], [1, 0, 0], [1, 0, 2]]) 
>>> lapl_dummy = np.dstack([one, two, three]) 
>>> get_scale_local_maximas(check_coords, lapl_dummy) 
array([[1, 0, 1]]) 
""" 

cdef Py_ssize_t y_coord, x_coord, point_layer, point_index 
cdef cnp.double_t point_response, lower_point_response, upper_point_response 
cdef Py_ssize_t amount_of_layers = laplacian_cube.shape[2] 
cdef Py_ssize_t amount_of_points = cube_coordinates.shape[0] 

# amount_of_layers = laplacian_cube.shape[2] 
# amount_of_points = cube_coordinates.shape[0] 

# Preallocate index. Fill it with False. 
accepted_points_index = np.ones(amount_of_points, dtype=bool) 

for point_index in range(amount_of_points): 

    interest_point_coords = cube_coordinates[point_index] 
    # Row coordinate 
    y_coord = interest_point_coords[0] 
    # Column coordinate 
    x_coord = interest_point_coords[1] 
    # Layer number starting from the smallest sigma 
    point_layer = interest_point_coords[2] 
    point_response = laplacian_cube[y_coord, x_coord, point_layer] 

    # Check the point under the current one 
    if point_layer != 0: 
     lower_point_response = laplacian_cube[y_coord, x_coord, point_layer-1] 
     if lower_point_response >= point_response: 
      accepted_points_index[point_index] = False 
      continue 

    # Check the point above the current one 
    if point_layer != (amount_of_layers-1): 
     upper_point_response = laplacian_cube[y_coord, x_coord, point_layer+1] 
     if upper_point_response >= point_response: 
      accepted_points_index[point_index] = False 
      continue 

# Return only accepted points 
return cube_coordinates[accepted_points_index]

Но я не вижу никакого выигрыша в скорости. А также я попытался заменить cnp.ndarray[cnp.double_t, ndim=3] на memoryview cnp.double_t[:, :, ::1], но это только замедлило весь код. Я буду признателен за любые подсказки или исправления в моем коде. Я относительно новичок в Cython, и я, возможно, сделал что-то не так.

Edit:

Я полностью переписал свою функцию в Cython:

def get_scale_local_maximas(cnp.ndarray[cnp.int_t, ndim=2] cube_coordinates, cnp.ndarray[cnp.double_t, ndim=3] laplacian_cube): 
""" 
Check provided cube coordinate for scale space local maximas. 
Returns only the points that satisfy the criteria. 

A point is considered to be a local maxima if its value is greater 
than the value of the point on the next scale level and the point 
on the previous scale level. If the tested point is located on the 
first scale level or on the last one, then only one inequality should 
hold in order for this point to be local scale maxima. 

Parameters 
---------- 
cube_coordinates : (n, 3) ndarray 
     A 2d array with each row representing 3 values, ``(y,x,scale_level)`` 
     where ``(y,x)`` are coordinates of the blob and ``scale_level`` is the 
     position of a point in scale space. 
laplacian_cube : ndarray of floats 
    Laplacian of Gaussian scale space. 

Returns 
------- 
output : (n, 3) ndarray 
    cube_coordinates that satisfy the local maximum criteria in 
    scale space. 

Examples 
-------- 
>>> one = np.array([[1, 2, 3], [4, 5, 6]]) 
>>> two = np.array([[7, 8, 9], [10, 11, 12]]) 
>>> three = np.array([[0, 0, 0], [0, 0, 0]]) 
>>> check_coords = np.array([[1, 0, 1], [1, 0, 0], [1, 0, 2]]) 
>>> lapl_dummy = np.dstack([one, two, three]) 
>>> get_scale_local_maximas(check_coords, lapl_dummy) 
array([[1, 0, 1]]) 
""" 

cdef Py_ssize_t y_coord, x_coord, point_layer, point_index 
cdef cnp.double_t point_response, lower_point_response, upper_point_response 
cdef Py_ssize_t amount_of_layers = laplacian_cube.shape[2] 
cdef Py_ssize_t amount_of_points = cube_coordinates.shape[0] 

# Preallocate index. Fill it with False. 
accepted_points_index = np.ones(amount_of_points, dtype=bool) 

for point_index in range(amount_of_points): 

    interest_point_coords = cube_coordinates[point_index] 
    # Row coordinate 
    y_coord = interest_point_coords[0] 
    # Column coordinate 
    x_coord = interest_point_coords[1] 
    # Layer number starting from the smallest sigma 
    point_layer = interest_point_coords[2] 
    point_response = laplacian_cube[y_coord, x_coord, point_layer] 

    # Check the point under the current one 
    if point_layer != 0: 
     lower_point_response = laplacian_cube[y_coord, x_coord, point_layer-1] 
     if lower_point_response >= point_response: 
      accepted_points_index[point_index] = False 
      continue 

    # Check the point above the current one 
    if point_layer != (amount_of_layers-1): 
     upper_point_response = laplacian_cube[y_coord, x_coord, point_layer+1] 
     if upper_point_response >= point_response: 
      accepted_points_index[point_index] = False 
      continue 

# Return only accepted points 
return cube_coordinates[accepted_points_index]

И после того, что я сделал несколько тестов с моей функцией и с предложенной функцией, которая была векторизованными:

%timeit compiled.get_scale_local_maximas_np(coords, lapl_dummy) 
%timeit compiled.get_scale_local_maximas(coords, lapl_dummy) 

%timeit dynamic.get_scale_local_maximas_np(coords, lapl_dummy) 
%timeit dynamic.get_scale_local_maximas(coords, lapl_dummy) 

10000 loops, best of 3: 101 µs per loop 
1000 loops, best of 3: 328 µs per loop 
10000 loops, best of 3: 103 µs per loop 
1000 loops, best of 3: 1.6 ms per loop

Пространство имен compiled представляет эти две функции, скомпилированные с использованием Cython.

Пространство имен dynamic представляет собой обычный файл Python.

Итак, я сделал вывод, что в этом случае метод numpy лучше.

источник

2015-03-19 warmspringwinds

Вы уже делаете 98% работы в NumPy. Китон не может сделать больше. –

Вы пытались использовать магию cython из ноутбука ipython? он будет выделять с желтыми линиями, где большая часть расчёта расходуется. вы также можете развернуть и свернуть эти строки, чтобы увидеть c-код, сгенерированный cython. удачи! – denfromufa

Для этого вам может не понадобиться cython, но если вы это сделаете, попробуйте создать свой код с помощью 'cython -a'. Созданный html cython поможет вам понять, что делает cython. Линии, которые могут быть непосредственно переведены на C, остаются белыми, а строки, требующие большого количества накладных расходов, выделяются в разных оттенках желтого. Ваша цель состоит в том, чтобы все важные петли были белыми. –

Ваш код на Python все еще может быть улучшен, так как вы уже не выполняете 98% в numpy: вы все еще выполняете итерацию по строкам массива координат и выполняете 1-2 проверки в строке.

Вы можете использовать «фантазию индексации» Numpy в и маски, чтобы получить его полностью в векторизованной форме:

def get_scale_local_maximas_full_np(coords, cube): 
    x, y, z = [ coords[:, ind] for ind in range(3) ] 

    point_responses = cube[x, y, z] 
    lowers = point_responses.copy() 
    uppers = point_responses.copy() 
    not_layer_0 = z > 0 
    lower_responses = cube[x[not_layer_0], y[not_layer_0], z[not_layer_0]-1] 
    lowers[not_layer_0] = lower_responses 

    not_max_layer = z < (cube.shape[2] - 1) 
    upper_responses = cube[x[not_max_layer], y[not_max_layer], z[not_max_layer]+1] 
    uppers[not_max_layer] = upper_responses 

    lo_check = np.ones(z.shape, dtype=np.bool) 
    lo_check[not_layer_0] = (point_responses > lowers)[not_layer_0] 
    hi_check = np.ones(z.shape, dtype=np.bool) 
    hi_check[not_max_layer] = (point_responses > uppers)[not_max_layer] 

    return coords[lo_check & hi_check]

Я генерировал набор несколько больших данных для тестирования производительности с:

lapl_dummy = np.random.rand(100,100,100) 
coords = np.random.random_integers(0,99, size=(1000,3))

Я получаю следующие результаты синхронизации:

In [146]: %timeit get_scale_local_maximas_full_np(coords, lapl_dummy) 
10000 loops, best of 3: 175 µs per loop 

In [147]: %timeit get_scale_local_maximas(coords, lapl_dummy) 
100 loops, best of 3: 2.24 ms per loop

Но, конечно, будьте осторожны с тестами производительности, потому что это часто зависит от используемых данных.

У меня мало опыта работы с Cython, вы не можете помочь вам.

источник

2015-03-19 23:23:58

Спасибо вам за помощь :) – warmspringwinds

Функция ускорения с использованием cython

ответ

Смежные вопросы