Source code for CompSensePack.dictionaries.MOD

"""
MOD.py - Method of Optimal Directions (MOD) algorithm for dictionary learning.

This module implements the MOD algorithm, which is used for learning a dictionary for sparse coding.
It includes utility functions to measure the distance between dictionaries and to learn a dictionary
that can sparsely represent a set of signals using the OMP algorithm.

Example usage:
--------------
>>> from MOD import MOD, I_findDistanceBetweenDictionaries
>>> data = np.random.randn(100, 200)  # Random signals
>>> params = {'K': 50, 'num_iterations': 10, 'initialization_method': 'DataElements', 'L': 5}
>>> dictionary, coefficients = MOD(data, params)

>>> # Distance between original and updated dictionary
>>> ratio, total_distance = I_findDistanceBetweenDictionaries(dictionary, new_dictionary)
"""

# system imports
import numpy as np
import scipy.sparse as sp
from scipy.linalg import solve
from .OMP import OMP  # Import OMP algorithm from the OMP module

[docs] def I_findDistanceBetweenDictionaries(original, new): """ Calculates the distance between two dictionaries. This function compares two dictionaries by calculating the number of columns that have a very low error between the original and the new dictionary. Parameters ---------- original : numpy.ndarray The original dictionary. new : numpy.ndarray The new dictionary. Returns ------- ratio : float The ratio of dictionary elements with a small error (i.e., those that satisfy the condition `errorOfElement < 0.01`). totalDistances : float The sum of all error values between the original and new dictionary elements. Notes ----- - The function modifies the new dictionary to ensure that each column starts with a positive value (consistent with the original dictionary). Example ------- >>> original_dict = np.random.randn(100, 50) >>> new_dict = np.random.randn(100, 50) >>> ratio, total_distance = I_findDistanceBetweenDictionaries(original_dict, new_dict) """ catchCounter = 0 totalDistances = 0 # Ensure columns in new dictionary start with positive values for i in range(new.shape[1]): new[:, i] = np.sign(new[0, i]) * new[:, i] for i in range(original.shape[1]): d = np.sign(original[0, i]) * original[:, i] distances = np.sum(new - np.tile(d, (1, new.shape[1])), axis=0) index = np.argmin(distances) errorOfElement = 1 - np.abs(new[:, index].T @ d) totalDistances += errorOfElement catchCounter += errorOfElement < 0.01 ratio = catchCounter / original.shape[1] return ratio, totalDistances
[docs] def MOD(data, parameters): """ Method of Optimal Directions (MOD) algorithm for dictionary learning. The MOD algorithm is a method for learning a dictionary for sparse representation of signals. It iteratively updates the dictionary to best represent the input data with sparse coefficients using the Orthogonal Matching Pursuit (OMP) algorithm. Parameters ---------- data : numpy.ndarray An (n x N) matrix containing N signals, each of dimension n. parameters : dict A dictionary containing the parameters for the MOD algorithm: - K : int The number of dictionary elements (columns) to train. - num_iterations : int The number of iterations to perform for dictionary learning. - initialization_method : str Method to initialize the dictionary. Options are: * 'DataElements' - Initializes the dictionary using the first K data signals. * 'GivenMatrix' - Initializes the dictionary using a provided matrix (requires 'initial_dictionary' key). - initial_dictionary : numpy.ndarray, optional The initial dictionary matrix to use if 'initialization_method' is set to 'GivenMatrix'. It should be of size (n x K). - L : int The number of non-zero coefficients to use in OMP for sparse representation of each signal. Returns ------- dictionary : numpy.ndarray The trained dictionary of size (n x K), where each column is a dictionary element. coef_matrix : numpy.ndarray The coefficient matrix of size (K x N), representing the sparse representation of the input data using the trained dictionary. Notes ----- - The dictionary is initialized either from the data signals or from a provided matrix. - The OMP algorithm is used to solve the sparse coding step at each iteration. - The algorithm iteratively refines the dictionary by solving a least squares problem. - The columns of the dictionary are normalized to unit norm and positive first elements. Example ------- >>> data = np.random.randn(100, 200) >>> params = {'K': 50, 'num_iterations': 10, 'initialization_method': 'DataElements', 'L': 5} >>> dictionary, coefficients = MOD(data, params) """ # Check if the number of signals is smaller than the dictionary size if data.shape[1] < parameters['K']: print("MOD: number of training signals is smaller than the dictionary size. Returning trivial solution...") dictionary = data[:, :data.shape[1]] coef_matrix = np.eye(data.shape[1]) # Trivial coefficients return dictionary, coef_matrix # Initialize dictionary based on the specified method if parameters['initialization_method'] == 'DataElements': dictionary = data[:, :parameters['K']] elif parameters['initialization_method'] == 'GivenMatrix': if 'initial_dictionary' not in parameters: raise ValueError("initial_dictionary parameter is required when " "initialization_method is set to 'GivenMatrix'.") dictionary = parameters['initial_dictionary'] else: raise ValueError( "Invalid value for initialization_method. Choose 'DataElements' or 'GivenMatrix'.") # Convert to float64 for precision dictionary = dictionary.astype(np.float64) # Normalize dictionary columns and avoid division by zero column_norms = np.linalg.norm(dictionary, axis=0) column_norms[column_norms < 1e-10] = 1 # Prevent division by zero dictionary /= column_norms # Ensure positive first elements dictionary *= np.sign(dictionary[0, :]) prev_dictionary = dictionary.copy() # Run MOD algorithm for iter_num in range(parameters['num_iterations']): # Step 1: Sparse coding using OMP coef_matrix = OMP(dictionary, data, parameters['L']) # Step 2: Update the dictionary regularization_term = 1e-7 * sp.eye(coef_matrix.shape[0]) matrix_a = coef_matrix @ coef_matrix.T + regularization_term.toarray() # Use pinv for numerical stability: "lstsq" or "regularizations" could also work dictionary = data @ coef_matrix.T @ np.linalg.pinv(matrix_a) # Normalize dictionary columns and avoid division by zero column_norms = np.linalg.norm(dictionary, axis=0) column_norms[column_norms < 1e-10] = 1 # Prevent division by zero dictionary /= column_norms # Ensure positive first elements dictionary *= np.sign(dictionary[0, :]) # Convergence check if np.linalg.norm(dictionary - prev_dictionary) < 1e-5: print(f"MOD converged after {iter_num + 1} iterations.") break prev_dictionary = dictionary.copy() return dictionary, coef_matrix