diff --git a/fibonacci.pxd b/fibonacci.pxd new file mode 100644 index 0000000000000..7d89a1b077f37 --- /dev/null +++ b/fibonacci.pxd @@ -0,0 +1,60 @@ +""" +The definition of the Fibonacci heap data structure, which provide a fast +find-minimum operation needed for a number of algorithms such as Dijkstra's +algorithm for shortest graph path searches. +""" + +cimport numpy as np +ctypedef np.float64_t DTYPE_t + + +###################################################################### +# FibonacciNode structure +# This structure and the operations on it are the nodes of the +# Fibonacci heap. +# + +cdef struct FibonacciNode: + unsigned int index, rank, state + DTYPE_t val + FibonacciNode *parent + FibonacciNode *left_sibling + FibonacciNode *right_sibling + FibonacciNode *children + +ctypedef FibonacciNode* pFibonacciNode + +cdef void initialize_node(FibonacciNode* node, + unsigned int index, + DTYPE_t val=*) + +cdef FibonacciNode* rightmost_sibling(FibonacciNode* node) + +cdef FibonacciNode* leftmost_sibling(FibonacciNode* node) + +cdef void add_child(FibonacciNode* node, FibonacciNode* new_child) + +cdef void add_sibling(FibonacciNode* node, FibonacciNode* new_sibling) + +cdef void remove(FibonacciNode* node) + + +###################################################################### +# FibonacciHeap structure +# This structure and operations on it use the FibonacciNode +# routines to implement a Fibonacci heap + +cdef struct FibonacciHeap: + FibonacciNode* min_node + pFibonacciNode[100] roots_by_rank # maximum number of nodes is ~2^100. + +cdef void insert_node(FibonacciHeap* heap, + FibonacciNode* node) + +cdef void decrease_val(FibonacciHeap* heap, + FibonacciNode* node, + DTYPE_t newval) + +cdef void link(FibonacciHeap* heap, FibonacciNode* node) + +cdef FibonacciNode* remove_min(FibonacciHeap* heap) diff --git a/fibonacci.pyx b/fibonacci.pyx new file mode 100644 index 0000000000000..bc9ae68fd3879 --- /dev/null +++ b/fibonacci.pyx @@ -0,0 +1,194 @@ +""" +The definition of the Fibonacci heap data structure, which provide a fast +find-minimum operation needed for a number of algorithms such as Dijkstra's +algorithm for shortest graph path searches. +""" + +from fibonacci cimport DTYPE_t + +cdef void initialize_node(FibonacciNode* node, + unsigned int index, + DTYPE_t val=0): + # Assumptions: - node is a valid pointer + # - node is not currently part of a heap + node.index = index + node.val = val + node.rank = 0 + node.state = 0 # 0 -> NOT_IN_HEAP + + node.parent = NULL + node.left_sibling = NULL + node.right_sibling = NULL + node.children = NULL + + +cdef FibonacciNode* rightmost_sibling(FibonacciNode* node): + # Assumptions: - node is a valid pointer + cdef FibonacciNode* temp = node + while(temp.right_sibling): + temp = temp.right_sibling + return temp + + +cdef FibonacciNode* leftmost_sibling(FibonacciNode* node): + # Assumptions: - node is a valid pointer + cdef FibonacciNode* temp = node + while(temp.left_sibling): + temp = temp.left_sibling + return temp + + +cdef void add_child(FibonacciNode* node, FibonacciNode* new_child): + # Assumptions: - node is a valid pointer + # - new_child is a valid pointer + # - new_child is not the sibling or child of another node + new_child.parent = node + + if node.children: + add_sibling(node.children, new_child) + else: + node.children = new_child + new_child.right_sibling = NULL + new_child.left_sibling = NULL + node.rank = 1 + + +cdef void add_sibling(FibonacciNode* node, FibonacciNode* new_sibling): + # Assumptions: - node is a valid pointer + # - new_sibling is a valid pointer + # - new_sibling is not the child or sibling of another node + cdef FibonacciNode* temp = rightmost_sibling(node) + temp.right_sibling = new_sibling + new_sibling.left_sibling = temp + new_sibling.right_sibling = NULL + new_sibling.parent = node.parent + if new_sibling.parent: + new_sibling.parent.rank += 1 + + +cdef void remove(FibonacciNode* node): + # Assumptions: - node is a valid pointer + if node.parent: + node.parent.rank -= 1 + if node.left_sibling: + node.parent.children = node.left_sibling + elif node.right_sibling: + node.parent.children = node.right_sibling + else: + node.parent.children = NULL + + if node.left_sibling: + node.left_sibling.right_sibling = node.right_sibling + if node.right_sibling: + node.right_sibling.left_sibling = node.left_sibling + + node.left_sibling = NULL + node.right_sibling = NULL + node.parent = NULL + +cdef struct FibonacciHeap: + FibonacciNode* min_node + pFibonacciNode[100] roots_by_rank # maximum number of nodes is ~2^100. + + +cdef void insert_node(FibonacciHeap* heap, + FibonacciNode* node): + # Assumptions: - heap is a valid pointer + # - node is a valid pointer + # - node is not the child or sibling of another node + if heap.min_node: + add_sibling(heap.min_node, node) + if node.val < heap.min_node.val: + heap.min_node = node + else: + heap.min_node = node + + +cdef void decrease_val(FibonacciHeap* heap, + FibonacciNode* node, + DTYPE_t newval): + # Assumptions: - heap is a valid pointer + # - newval <= node.val + # - node is a valid pointer + # - node is not the child or sibling of another node + node.val = newval + if node.parent and (node.parent.val >= newval): + remove(node) + insert_node(heap, node) + elif heap.min_node.val > node.val: + heap.min_node = node + + +cdef void link(FibonacciHeap* heap, FibonacciNode* node): + # Assumptions: - heap is a valid pointer + # - node is a valid pointer + # - node is already within heap + + cdef FibonacciNode *linknode + cdef FibonacciNode *parent + cdef FibonacciNode *child + + if heap.roots_by_rank[node.rank] == NULL: + heap.roots_by_rank[node.rank] = node + else: + linknode = heap.roots_by_rank[node.rank] + heap.roots_by_rank[node.rank] = NULL + + if node.val < linknode.val or node == heap.min_node: + remove(linknode) + add_child(node, linknode) + link(heap, node) + else: + remove(node) + add_child(linknode, node) + link(heap, linknode) + + +cdef FibonacciNode* remove_min(FibonacciHeap* heap): + # Assumptions: - heap is a valid pointer + # - heap.min_node is a valid pointer + cdef FibonacciNode *temp + cdef FibonacciNode *temp_right + cdef FibonacciNode *out + cdef unsigned int i + + # make all min_node children into root nodes + if heap.min_node.children: + temp = leftmost_sibling(heap.min_node.children) + temp_right = NULL + + while temp: + temp_right = temp.right_sibling + remove(temp) + add_sibling(heap.min_node, temp) + temp = temp_right + + heap.min_node.children = NULL + + # choose a root node other than min_node + temp = leftmost_sibling(heap.min_node) + if temp == heap.min_node: + if heap.min_node.right_sibling: + temp = heap.min_node.right_sibling + else: + out = heap.min_node + heap.min_node = NULL + return out + + # remove min_node, and point heap to the new min + out = heap.min_node + remove(heap.min_node) + heap.min_node = temp + + # re-link the heap + for i from 0 <= i < 100: + heap.roots_by_rank[i] = NULL + + while temp: + if temp.val < heap.min_node.val: + heap.min_node = temp + temp_right = temp.right_sibling + link(heap, temp) + temp = temp_right + + return out diff --git a/sklearn/utils/graph_shortest_path.pyx b/sklearn/utils/graph_shortest_path.pyx index bbc55a405bb23..bd7227560353e 100644 --- a/sklearn/utils/graph_shortest_path.pyx +++ b/sklearn/utils/graph_shortest_path.pyx @@ -3,7 +3,7 @@ Routines for performing shortest-path graph searches The main interface is in the function `graph_shortest_path`. This calls cython routines that compute the shortest path using either -the Floyd-Warshall algorithm, or Dykstra's algorithm with Fibonacci Heaps. +the Floyd-Warshall algorithm, or Dijkstra's algorithm with Fibonacci Heaps. """ # Author: Jake Vanderplas -- @@ -20,6 +20,12 @@ from libc.stdlib cimport malloc, free np.import_array() +cimport fibonacci +from fibonacci cimport (FibonacciHeap, FibonacciNode, initialize_node, + rightmost_sibling, leftmost_sibling, add_child, + add_sibling, remove, insert_node, decrease_val, + link, remove_min) + DTYPE = np.float64 ctypedef np.float64_t DTYPE_t @@ -235,220 +241,6 @@ cdef np.ndarray dijkstra(dist_matrix, return graph -###################################################################### -# FibonacciNode structure -# This structure and the operations on it are the nodes of the -# Fibonacci heap. -# - -cdef struct FibonacciNode: - unsigned int index - unsigned int rank - unsigned int state - DTYPE_t val - FibonacciNode* parent - FibonacciNode* left_sibling - FibonacciNode* right_sibling - FibonacciNode* children - - -cdef void initialize_node(FibonacciNode* node, - unsigned int index, - DTYPE_t val=0): - # Assumptions: - node is a valid pointer - # - node is not currently part of a heap - node.index = index - node.val = val - node.rank = 0 - node.state = 0 # 0 -> NOT_IN_HEAP - - node.parent = NULL - node.left_sibling = NULL - node.right_sibling = NULL - node.children = NULL - - -cdef FibonacciNode* rightmost_sibling(FibonacciNode* node): - # Assumptions: - node is a valid pointer - cdef FibonacciNode* temp = node - while(temp.right_sibling): - temp = temp.right_sibling - return temp - - -cdef FibonacciNode* leftmost_sibling(FibonacciNode* node): - # Assumptions: - node is a valid pointer - cdef FibonacciNode* temp = node - while(temp.left_sibling): - temp = temp.left_sibling - return temp - - -cdef void add_child(FibonacciNode* node, FibonacciNode* new_child): - # Assumptions: - node is a valid pointer - # - new_child is a valid pointer - # - new_child is not the sibling or child of another node - new_child.parent = node - - if node.children: - add_sibling(node.children, new_child) - else: - node.children = new_child - new_child.right_sibling = NULL - new_child.left_sibling = NULL - node.rank = 1 - - -cdef void add_sibling(FibonacciNode* node, FibonacciNode* new_sibling): - # Assumptions: - node is a valid pointer - # - new_sibling is a valid pointer - # - new_sibling is not the child or sibling of another node - cdef FibonacciNode* temp = rightmost_sibling(node) - temp.right_sibling = new_sibling - new_sibling.left_sibling = temp - new_sibling.right_sibling = NULL - new_sibling.parent = node.parent - if new_sibling.parent: - new_sibling.parent.rank += 1 - - -cdef void remove(FibonacciNode* node): - # Assumptions: - node is a valid pointer - if node.parent: - node.parent.rank -= 1 - if node.left_sibling: - node.parent.children = node.left_sibling - elif node.right_sibling: - node.parent.children = node.right_sibling - else: - node.parent.children = NULL - - if node.left_sibling: - node.left_sibling.right_sibling = node.right_sibling - if node.right_sibling: - node.right_sibling.left_sibling = node.left_sibling - - node.left_sibling = NULL - node.right_sibling = NULL - node.parent = NULL - - -###################################################################### -# FibonacciHeap structure -# This structure and operations on it use the FibonacciNode -# routines to implement a Fibonacci heap - -ctypedef FibonacciNode* pFibonacciNode - - -cdef struct FibonacciHeap: - FibonacciNode* min_node - pFibonacciNode[100] roots_by_rank # maximum number of nodes is ~2^100. - - -cdef void insert_node(FibonacciHeap* heap, - FibonacciNode* node): - # Assumptions: - heap is a valid pointer - # - node is a valid pointer - # - node is not the child or sibling of another node - if heap.min_node: - add_sibling(heap.min_node, node) - if node.val < heap.min_node.val: - heap.min_node = node - else: - heap.min_node = node - - -cdef void decrease_val(FibonacciHeap* heap, - FibonacciNode* node, - DTYPE_t newval): - # Assumptions: - heap is a valid pointer - # - newval <= node.val - # - node is a valid pointer - # - node is not the child or sibling of another node - node.val = newval - if node.parent and (node.parent.val >= newval): - remove(node) - insert_node(heap, node) - elif heap.min_node.val > node.val: - heap.min_node = node - - -cdef void link(FibonacciHeap* heap, FibonacciNode* node): - # Assumptions: - heap is a valid pointer - # - node is a valid pointer - # - node is already within heap - - cdef FibonacciNode *linknode - cdef FibonacciNode *parent - cdef FibonacciNode *child - - if heap.roots_by_rank[node.rank] == NULL: - heap.roots_by_rank[node.rank] = node - else: - linknode = heap.roots_by_rank[node.rank] - heap.roots_by_rank[node.rank] = NULL - - if node.val < linknode.val or node == heap.min_node: - remove(linknode) - add_child(node, linknode) - link(heap, node) - else: - remove(node) - add_child(linknode, node) - link(heap, linknode) - - -cdef FibonacciNode* remove_min(FibonacciHeap* heap): - # Assumptions: - heap is a valid pointer - # - heap.min_node is a valid pointer - cdef FibonacciNode *temp - cdef FibonacciNode *temp_right - cdef FibonacciNode *out - cdef unsigned int i - - # make all min_node children into root nodes - if heap.min_node.children: - temp = leftmost_sibling(heap.min_node.children) - temp_right = NULL - - while temp: - temp_right = temp.right_sibling - remove(temp) - add_sibling(heap.min_node, temp) - temp = temp_right - - heap.min_node.children = NULL - - # choose a root node other than min_node - temp = leftmost_sibling(heap.min_node) - if temp == heap.min_node: - if heap.min_node.right_sibling: - temp = heap.min_node.right_sibling - else: - out = heap.min_node - heap.min_node = NULL - return out - - # remove min_node, and point heap to the new min - out = heap.min_node - remove(heap.min_node) - heap.min_node = temp - - # re-link the heap - for i from 0 <= i < 100: - heap.roots_by_rank[i] = NULL - - while temp: - if temp.val < heap.min_node.val: - heap.min_node = temp - temp_right = temp.right_sibling - link(heap, temp) - temp = temp_right - - return out - - ###################################################################### # Debugging: Functions for printing the fibonacci heap #