Source code for Analytics

import copy
import queue
from typing import List, Dict, Tuple
import warnings

import networkx as nx
import numpy as np
from numpy import linalg

import numba
from numba import jit, prange

try:
    from Solver import Solver
except ImportError:
    from .Solver import Solver


[docs]class Analytics:
[docs] @staticmethod def get_neighbour_matrix(nxg: nx.Graph): warnings.warn("Function depreciated, please use get_adjacency_matrix(nxg, True) instead", DeprecationWarning) return Analytics.get_adjacency_matrix(nxg, True)
[docs] @staticmethod def get_adjacency_matrix(nxg: nx.Graph, self_assignment=False) -> List[List[int]]: """ Creates a neighbour matrix for a specified graph: g, each row represents a node in the graph where the values in each column represents if there is an edge or not between those nodes. :param nxg: networkx bi-directional graph object. :type nxg: nx.Graph :param self_assignment: Whether or not to use self assignment in the graph. Used for convergence rate. :type nxg: bool :return A: List of rows, representing the adjacency matrix. :rtype: List[List[float]] """ # Sort the nodes in the graph s_nodes = list(nxg.nodes()) s_nodes.sort() # Get the dimension of each row dim = len(s_nodes) mx = [] for node in nxg.nodes(): row = [0] * dim # Get the index of the current node node_index = s_nodes.index(node) if self_assignment: row[node_index] = 1 for neighbour in nxg.neighbors(node): node_index = s_nodes.index(neighbour) row[node_index] = 1 mx.append(row) return mx
[docs] @staticmethod def get_stochastic_neighbour_matrix(nxg: nx.Graph = None, adjacency_matrix: List[List[int]] = None) -> List[List[float]]: """ Creates a stochastic adjacency matrix for a specified graph: g, each row represents a node in the graph where the values in each column represents if there is an edge or not between those nodes. The values for each neighbour is represented by 1/(number of neighbours), if no edge exists this value is 0. :param nxg: Networkx bi-directional graph object. :type nxg: nx.Graph :param adjacency_matrix: Self assigned adjacency matrix. :type adjacency_matrix: List[List[int]] :return A: List of rows, representing the adjacency matrix. :rtype: List[List[float]] """ if nxg is None and adjacency_matrix is None: raise ValueError('At least one parameter of nxg or adjacency_matrix needs to be provided') # If we wasn't provided with the adjacency matrix, get it. if adjacency_matrix is None: # Get the adjacency matrix adjacency_matrix = Analytics.get_adjacency_matrix(nxg, True) mx = [] # Iterate over each row for row_id, _ in enumerate(adjacency_matrix): # Calculate the sum for each row row_sum = sum(adjacency_matrix[row_id]) # Divide each node in the row with the sum of the row mx.append(list(map(lambda x: (x / row_sum), adjacency_matrix[row_id]))) # Working solution that might however be worse than the previous solution. # mx = list(map(lambda row: list(map(lambda cell: cell / sum(row), row)), mx)) return mx
[docs] @staticmethod def get_eigenvalues(mx: List[List[float]], symmetrical: bool = False) -> np.ndarray: """ Simple function to retrieve the eigenvalues of a matrix. :param mx: A matrix made up of nested lists. :param symmetrical: Whether or not the matrix is symmetrical. If tru it can make faster computations. :return: List of eigenvalues of the provided matrix. :rtype: List[float] """ if symmetrical: return linalg.eigvalsh(mx) else: return np.real(linalg.eigvals(mx))
[docs] @staticmethod def second_largest(numbers: List[float], sorted_list: bool = False) -> float: """ Simple function to return the 2nd largest number in a list of numbers. :param numbers: A list of numbers :param sorted_list: If the list is sorted or not :return: The 2nd largest number in the list numbers :rtype: float """ if sorted_list: return numbers[len(numbers) - 2] count = 0 m1 = m2 = float('-inf') for x in numbers: count += 1 if x > m2: if x >= m1: m1, m2 = x, m1 else: m2 = x return m2 if count >= 2 else None
[docs] @staticmethod @jit(nopython=True) def second_largest_cuda(numbers: List[float]) -> float: """ Simple function to return the 2nd largest number in a list of numbers. :param numbers: A list of numbers :return: The 2nd largest number in the list numbers :rtype: float """ count = 0 m1 = m2 = -10000 for x in numbers: count += 1 if x > m2: if x >= m1: m1, m2 = x, m1 else: m2 = x return m2 if count >= 2 else None
[docs] @staticmethod def second_smallest(numbers: List[float], sorted_list: bool = False) -> float: """ Simple function to return the 2nd smallest number in a list of numbers. :param numbers: A list of numbers :param sorted_list: If the list is sorted or not :return: The 2nd smallest number in the list numbers :rtype: float """ if sorted_list: return numbers[1] count = 0 m1 = m2 = float('inf') for x in numbers: count += 1 if x < m2: if x <= m1: m1, m2 = x, m1 else: m2 = x return m2 if count >= 2 else None
[docs] @staticmethod def convergence_rate(nxg: nx.Graph = None, stochastic_neighbour_matrix: List[List[float]] = None) -> float: """ Function to retrieve the 2nd largest eigenvalue in the adjacency matrix of a graph :param nxg: networkx bi-directional graph object :type nxg: nx.Graph :param stochastic_neighbour_matrix: The stochastic neighbour matrix of the given graph. :type stochastic_neighbour_matrix: List[List[float]] :return: The 2nd largest eigenvalue of the adjacency matrix :rtype: float """ if nxg is None and stochastic_neighbour_matrix is None: raise ValueError('At least one parameter of nxg or stochastic_neighbour_matrix needs to be provided') # If we wasn't provided with the adjacency matrix, get it. if stochastic_neighbour_matrix is None: # Get the adjacency matrix A = Analytics.get_stochastic_neighbour_matrix(nxg) else: A = stochastic_neighbour_matrix ev = Analytics.get_eigenvalues(A) return Analytics.second_largest_cuda(ev)
[docs] @staticmethod @jit(nopython=True) def convergence_rate_cuda(neighbour_matrix: np.ndarray) -> float: stochastic = neighbour_matrix / neighbour_matrix.sum(axis=1) eigenvalues = np.real(linalg.eigvals(stochastic)) count = 0 m1 = m2 = -10000.0 for x in eigenvalues: count += 1 if x > m2: if x >= m1: m1, m2 = x, m1 else: m2 = x return m2 if count >= 2 else None
[docs] @staticmethod def convergence_rate2(nxg: nx.Graph) -> float: """ Function to retrieve convergence rate based on an alternate approach. :param nxg: networkx bi-directional graph object :type nxg: nx.Graph :return: Alternate convergence rage :rtype: float """ A = Analytics.get_stochastic_neighbour_matrix(nxg) ev = Analytics.get_eigenvalues(A) largest = max(ev) smallest = min(ev) second_largest = Analytics.second_largest(ev) return max( largest - abs(second_largest), largest - abs(smallest) )
[docs] @staticmethod def total_edge_cost(nxg: nx.Graph) -> int: """ Calculates the total cost of all edges in the given graph :param nxg: A networkx object with nodes and edges. :type nxg: nx.Graph :return: The total cost of all edges in the graph. :rtype: float """ total = 0 edges = nxg.edges(data=True) for edge in edges: if 'weight' in edge[2]: total += edge[2]['weight'] return total
[docs] @staticmethod def hypothetical_max_edge_cost(nxg: nx.Graph) -> float: """ Calculates the hypothetical total edge cost if the graph were to be complete. :rtype: float :param nxg: The graph to calculate the hypothetical edge cost of. :return: The total edge cost if the graph were complete. """ complete_graph = copy.deepcopy(nxg) complete_graph = Solver.complete(complete_graph) total_edge_cost = Analytics.total_edge_cost(complete_graph) del complete_graph return total_edge_cost
[docs] @staticmethod def get_distance_distribution(nxg: nx.Graph) -> Dict[int, int]: """ Makes a list representing the distribution of longest shortest paths between every node in the graph. :rtype: Dict[int, int] :param nxg: A given graph with edges. :return: A dict with a distribution of the longest shortest paths between nodes. """ warnings.warn("Function depreciated, please use get_eccentricity_distribution(nxg) instead", DeprecationWarning) # Get a list of all paths paths = list(nx.networkx.all_pairs_shortest_path_length(nxg)) # Create an empty dict of distance distributions distributions = {} # Iterate over each path for origin, path in paths: # Make sure we don't check the same path twice max_node_distance = -1 for dest in range(0, len(path)): # Get the actual shortest distance between 2 nodes max_node_distance = max(max_node_distance, path.get(dest)) # Make sure we create the distance first, then add one to it if max_node_distance is not -1: if max_node_distance not in distributions: distributions[max_node_distance] = 1 else: distributions[max_node_distance] += 1 return distributions
[docs] @staticmethod def get_eccentricity_distribution(nxg: nx.Graph) -> Dict[int, int]: """ Makes a list representing the distribution of longest shortest paths between every node in the graph. :rtype: Dict[int, int] :param nxg: A given graph with edges. :return: A dict with a distribution of the longest shortest paths between nodes. """ # Get the eccentricity of the graph eccentricities = nx.eccentricity(nxg) # Create a distribution dictionary distributions = {} # Iterate over the eccentricities for nid, eccentricity in eccentricities.items(): # Make sure an occurrence if the eccentricity exists in the distribution dict if eccentricity not in distributions: distributions[eccentricity] = 0 # Add one to the eccentricity distribution distributions[eccentricity] += 1 return distributions
[docs] @staticmethod @jit(nopython=True) def is_nodes_connected_cuda(mx: np.ndarray, origin: int, destination: int): size = len(mx) seen = set() q = [origin] while len(q) > 0: start = q.pop() seen.add(start) for i in range(0, size): if mx[start, i] != 0 and i != start: if i == destination: return True elif i not in seen: q.append(i) return False
[docs] @staticmethod def is_nodes_connected(nxg: nx.Graph, origin: int, destination: int) -> bool: """ Checks if two nodes are connected with each other using a BFS approach. :param nxg: The grapg that contains the two nodes. :param origin: The origin node id to check from. :param destination: The destination node to check the connectivity to. :return: True if there's a connection between the nodes, otherwise False. """ # Create a set of seen nodes seen = set() # Create a queue q = queue.Queue() # Add the start node q.put(origin) # Iterate while the queue is not empty while not q.empty(): # Get the first element of the queue node_id = q.get() # If we're at the destination, return True if node_id == destination: return True # Otherwise, add the node to seen seen.add(node_id) # Iterate over the neighbours, but discard the first element # since it should be the origin for _, neighbour in nxg.edges(node_id): # If we haven't seen it, add it to the queue if neighbour not in seen: q.put(neighbour) # If we've reached here we haven't found the other node. If so, return False return False
[docs] @staticmethod def get_node_dict(nxg: nx.Graph) -> Dict[int, Tuple[int, int]]: """ Converts a networkx object to a dict with nodes and their positions. Can be used to recreate a new graph with Creator.from_dict(). :rtype: Dict[int, Tuple[int, int]] :param nxg: The graph to get the nodes from. :return: A dict of nodes with their corresponding positions. """ nodes = {} for node in nxg.nodes(data=True): nodes[node[0]] = (node[1]['x'], node[1]['y']) return nodes
[docs] @staticmethod def get_edge_dict(nxg: nx.Graph) -> Dict[int, List[int]]: """ Converts a networkx object to a dict with edges and their neighbours. Can be used to recreate a new graph with Creator.from_dict(). :rtype: Dict[int, List[int]] :param nxg: The graph to get the edges from. :return: A neighbour list for all nodes. """ edges = {} for origin, dest in nxg.edges(): if origin not in edges: edges[origin] = [] edges[origin].append(dest) return edges
[docs] @staticmethod def get_average_eccentricity(nxg: nx.Graph) -> float: """ Calculates the average eccentricity from the given graph. :rtype: float :param nxg: The graph to get the average eccentricity from. :return: The average eccentricty from the graph. """ distribution = Analytics.get_eccentricity_distribution(nxg) occurrence = 0 count = 0 for d, c in distribution.items(): occurrence += d * c count += c return occurrence / count
[docs] @staticmethod def get_degree_matrix(nxg: nx.Graph) -> List[List[int]]: # Sort the nodes in the graph s_nodes = list(nxg.nodes()) s_nodes.sort() # Get the dimension of each row dim = len(s_nodes) mx = [] for node in nxg.nodes(): row = [0] * dim # Get the index of the current node node_index = s_nodes.index(node) row[node_index] = nx.degree(nxg, node_index) mx.append(row) return mx
[docs] @staticmethod def get_laplacian_matrix(nxg: nx.Graph) -> List[List[int]]: """ Calculates the laplacian matrix based on a given graph. :param nxg: The graph to get the laplacian matrix from. :return: The laplacian matrix, such as L = D - A where D = Degree matrix and A = Adjacency matrix """ laplacian_matrix = [] degree_matrix = Analytics.get_degree_matrix(nxg) adjacency_matrix = Analytics.get_adjacency_matrix(nxg) dimension = len(adjacency_matrix) for r in range(0, dimension): row = [0] * dimension for c in range(0, dimension): row[c] = degree_matrix[r][c] - adjacency_matrix[r][c] laplacian_matrix.append(row) return laplacian_matrix
[docs] @staticmethod def is_graph_connected(laplacian_matrix: List[List[int]]): """ Checks whether a given graph is connected based on its laplacian matrix. :param laplacian_matrix: The laplacian matrix, representing the graph. :return: Whether it's connected or not. """ ev = Analytics.get_eigenvalues(laplacian_matrix, symmetrical=True) second_smallest = Analytics.second_smallest(ev, True) # Check if it's above a certain threshold due to floating point errors return second_smallest > 1e-8