Я пытаюсь ускорить выполнение одной из моих функций.Функция ускорения с использованием cython
def get_scale_local_maximas(cube_coordinates, laplacian_cube):
"""
Check provided cube coordinate for scale space local maximas.
Returns only the points that satisfy the criteria.
A point is considered to be a local maxima if its value is greater
than the value of the point on the next scale level and the point
on the previous scale level. If the tested point is located on the
first scale level or on the last one, then only one inequality should
hold in order for this point to be local scale maxima.
Parameters
----------
cube_coordinates : (n, 3) ndarray
A 2d array with each row representing 3 values, ``(y,x,scale_level)``
where ``(y,x)`` are coordinates of the blob and ``scale_level`` is the
position of a point in scale space.
laplacian_cube : ndarray of floats
Laplacian of Gaussian scale space.
Returns
-------
output : (n, 3) ndarray
cube_coordinates that satisfy the local maximum criteria in
scale space.
Examples
--------
>>> one = np.array([[1, 2, 3], [4, 5, 6]])
>>> two = np.array([[7, 8, 9], [10, 11, 12]])
>>> three = np.array([[0, 0, 0], [0, 0, 0]])
>>> check_coords = np.array([[1, 0, 1], [1, 0, 0], [1, 0, 2]])
>>> lapl_dummy = np.dstack([one, two, three])
>>> get_scale_local_maximas(check_coords, lapl_dummy)
array([[1, 0, 1]])
"""
amount_of_layers = laplacian_cube.shape[2]
amount_of_points = cube_coordinates.shape[0]
# Preallocate index. Fill it with False.
accepted_points_index = np.ones(amount_of_points, dtype=bool)
for point_index, interest_point_coords in enumerate(cube_coordinates):
# Row coordinate
y_coord = interest_point_coords[0]
# Column coordinate
x_coord = interest_point_coords[1]
# Layer number starting from the smallest sigma
point_layer = interest_point_coords[2]
point_response = laplacian_cube[y_coord, x_coord, point_layer]
# Check the point under the current one
if point_layer != 0:
lower_point_response = laplacian_cube[y_coord, x_coord, point_layer-1]
if lower_point_response >= point_response:
accepted_points_index[point_index] = False
continue
# Check the point above the current one
if point_layer != (amount_of_layers-1):
upper_point_response = laplacian_cube[y_coord, x_coord, point_layer+1]
if upper_point_response >= point_response:
accepted_points_index[point_index] = False
continue
# Return only accepted points
return cube_coordinates[accepted_points_index]
Это моя попытка ускорить его помощью Cython:
# cython: cdivision=True
# cython: boundscheck=False
# cython: nonecheck=False
# cython: wraparound=False
import numpy as np
cimport numpy as cnp
def get_scale_local_maximas(cube_coordinates, cnp.ndarray[cnp.double_t, ndim=3] laplacian_cube):
"""
Check provided cube coordinate for scale space local maximas.
Returns only the points that satisfy the criteria.
A point is considered to be a local maxima if its value is greater
than the value of the point on the next scale level and the point
on the previous scale level. If the tested point is located on the
first scale level or on the last one, then only one inequality should
hold in order for this point to be local scale maxima.
Parameters
----------
cube_coordinates : (n, 3) ndarray
A 2d array with each row representing 3 values, ``(y,x,scale_level)``
where ``(y,x)`` are coordinates of the blob and ``scale_level`` is the
position of a point in scale space.
laplacian_cube : ndarray of floats
Laplacian of Gaussian scale space.
Returns
-------
output : (n, 3) ndarray
cube_coordinates that satisfy the local maximum criteria in
scale space.
Examples
--------
>>> one = np.array([[1, 2, 3], [4, 5, 6]])
>>> two = np.array([[7, 8, 9], [10, 11, 12]])
>>> three = np.array([[0, 0, 0], [0, 0, 0]])
>>> check_coords = np.array([[1, 0, 1], [1, 0, 0], [1, 0, 2]])
>>> lapl_dummy = np.dstack([one, two, three])
>>> get_scale_local_maximas(check_coords, lapl_dummy)
array([[1, 0, 1]])
"""
cdef Py_ssize_t y_coord, x_coord, point_layer, point_index
cdef cnp.double_t point_response, lower_point_response, upper_point_response
cdef Py_ssize_t amount_of_layers = laplacian_cube.shape[2]
cdef Py_ssize_t amount_of_points = cube_coordinates.shape[0]
# amount_of_layers = laplacian_cube.shape[2]
# amount_of_points = cube_coordinates.shape[0]
# Preallocate index. Fill it with False.
accepted_points_index = np.ones(amount_of_points, dtype=bool)
for point_index in range(amount_of_points):
interest_point_coords = cube_coordinates[point_index]
# Row coordinate
y_coord = interest_point_coords[0]
# Column coordinate
x_coord = interest_point_coords[1]
# Layer number starting from the smallest sigma
point_layer = interest_point_coords[2]
point_response = laplacian_cube[y_coord, x_coord, point_layer]
# Check the point under the current one
if point_layer != 0:
lower_point_response = laplacian_cube[y_coord, x_coord, point_layer-1]
if lower_point_response >= point_response:
accepted_points_index[point_index] = False
continue
# Check the point above the current one
if point_layer != (amount_of_layers-1):
upper_point_response = laplacian_cube[y_coord, x_coord, point_layer+1]
if upper_point_response >= point_response:
accepted_points_index[point_index] = False
continue
# Return only accepted points
return cube_coordinates[accepted_points_index]
Но я не вижу никакого выигрыша в скорости. А также я попытался заменить cnp.ndarray[cnp.double_t, ndim=3]
на memoryview cnp.double_t[:, :, ::1]
, но это только замедлило весь код. Я буду признателен за любые подсказки или исправления в моем коде. Я относительно новичок в Cython, и я, возможно, сделал что-то не так.
Edit:
Я полностью переписал свою функцию в Cython:
def get_scale_local_maximas(cnp.ndarray[cnp.int_t, ndim=2] cube_coordinates, cnp.ndarray[cnp.double_t, ndim=3] laplacian_cube):
"""
Check provided cube coordinate for scale space local maximas.
Returns only the points that satisfy the criteria.
A point is considered to be a local maxima if its value is greater
than the value of the point on the next scale level and the point
on the previous scale level. If the tested point is located on the
first scale level or on the last one, then only one inequality should
hold in order for this point to be local scale maxima.
Parameters
----------
cube_coordinates : (n, 3) ndarray
A 2d array with each row representing 3 values, ``(y,x,scale_level)``
where ``(y,x)`` are coordinates of the blob and ``scale_level`` is the
position of a point in scale space.
laplacian_cube : ndarray of floats
Laplacian of Gaussian scale space.
Returns
-------
output : (n, 3) ndarray
cube_coordinates that satisfy the local maximum criteria in
scale space.
Examples
--------
>>> one = np.array([[1, 2, 3], [4, 5, 6]])
>>> two = np.array([[7, 8, 9], [10, 11, 12]])
>>> three = np.array([[0, 0, 0], [0, 0, 0]])
>>> check_coords = np.array([[1, 0, 1], [1, 0, 0], [1, 0, 2]])
>>> lapl_dummy = np.dstack([one, two, three])
>>> get_scale_local_maximas(check_coords, lapl_dummy)
array([[1, 0, 1]])
"""
cdef Py_ssize_t y_coord, x_coord, point_layer, point_index
cdef cnp.double_t point_response, lower_point_response, upper_point_response
cdef Py_ssize_t amount_of_layers = laplacian_cube.shape[2]
cdef Py_ssize_t amount_of_points = cube_coordinates.shape[0]
# Preallocate index. Fill it with False.
accepted_points_index = np.ones(amount_of_points, dtype=bool)
for point_index in range(amount_of_points):
interest_point_coords = cube_coordinates[point_index]
# Row coordinate
y_coord = interest_point_coords[0]
# Column coordinate
x_coord = interest_point_coords[1]
# Layer number starting from the smallest sigma
point_layer = interest_point_coords[2]
point_response = laplacian_cube[y_coord, x_coord, point_layer]
# Check the point under the current one
if point_layer != 0:
lower_point_response = laplacian_cube[y_coord, x_coord, point_layer-1]
if lower_point_response >= point_response:
accepted_points_index[point_index] = False
continue
# Check the point above the current one
if point_layer != (amount_of_layers-1):
upper_point_response = laplacian_cube[y_coord, x_coord, point_layer+1]
if upper_point_response >= point_response:
accepted_points_index[point_index] = False
continue
# Return only accepted points
return cube_coordinates[accepted_points_index]
И после того, что я сделал несколько тестов с моей функцией и с предложенной функцией, которая была векторизованными:
%timeit compiled.get_scale_local_maximas_np(coords, lapl_dummy)
%timeit compiled.get_scale_local_maximas(coords, lapl_dummy)
%timeit dynamic.get_scale_local_maximas_np(coords, lapl_dummy)
%timeit dynamic.get_scale_local_maximas(coords, lapl_dummy)
10000 loops, best of 3: 101 µs per loop
1000 loops, best of 3: 328 µs per loop
10000 loops, best of 3: 103 µs per loop
1000 loops, best of 3: 1.6 ms per loop
Пространство имен compiled
представляет эти две функции, скомпилированные с использованием Cython.
Пространство имен dynamic
представляет собой обычный файл Python.
Итак, я сделал вывод, что в этом случае метод numpy лучше.
Вы уже делаете 98% работы в NumPy. Китон не может сделать больше. –
Вы пытались использовать магию cython из ноутбука ipython? он будет выделять с желтыми линиями, где большая часть расчёта расходуется. вы также можете развернуть и свернуть эти строки, чтобы увидеть c-код, сгенерированный cython. удачи! – denfromufa
Для этого вам может не понадобиться cython, но если вы это сделаете, попробуйте создать свой код с помощью 'cython -a'. Созданный html cython поможет вам понять, что делает cython. Линии, которые могут быть непосредственно переведены на C, остаются белыми, а строки, требующие большого количества накладных расходов, выделяются в разных оттенках желтого. Ваша цель состоит в том, чтобы все важные петли были белыми. –