diff --git a/docs/source/gann.rst b/docs/source/gann.rst index 8b243e71..821be344 100644 --- a/docs/source/gann.rst +++ b/docs/source/gann.rst @@ -535,7 +535,7 @@ value (i.e. accuracy) of 100 is reached after around 180 generations. ga_instance.plot_fitness() -.. figure:: https://user-images.githubusercontent.com/16560492/82078638-c11e0700-96e1-11ea-8aa9-c36761c5e9c7.png +.. image:: https://user-images.githubusercontent.com/16560492/82078638-c11e0700-96e1-11ea-8aa9-c36761c5e9c7.png :alt: By running the code again, a different initial population is created and @@ -930,7 +930,7 @@ The number of wrong classifications is only 1 and the accuracy is The next figure shows how fitness value evolves by generation. -.. figure:: https://user-images.githubusercontent.com/16560492/82152993-21898180-9865-11ea-8387-b995f88b83f7.png +.. image:: https://user-images.githubusercontent.com/16560492/82152993-21898180-9865-11ea-8387-b995f88b83f7.png :alt: Regression Example 1 @@ -998,10 +998,10 @@ for regression. GANN_instance.update_population_trained_weights(population_trained_weights=population_matrices) print("Generation = {generation}".format(generation=ga_instance.generations_completed)) - print("Fitness = {fitness}".format(fitness=ga_instance.best_solution()[1])) - print("Change = {change}".format(change=ga_instance.best_solution()[1] - last_fitness)) + print("Fitness = {fitness}".format(fitness=ga_instance.best_solution(pop_fitness=ga_instance.last_generation_fitness)[1])) + print("Change = {change}".format(change=ga_instance.best_solution(pop_fitness=ga_instance.last_generation_fitness)[1] - last_fitness)) - last_fitness = ga_instance.best_solution()[1].copy() + last_fitness = ga_instance.best_solution(pop_fitness=ga_instance.last_generation_fitness)[1].copy() # Holds the fitness value of the previous generation. last_fitness = 0 @@ -1011,8 +1011,8 @@ for regression. [8, 15, 20, 13]]) # Preparing the NumPy array of the outputs. - data_outputs = numpy.array([0.1, - 1.5]) + data_outputs = numpy.array([[0.1, 0.2], + [1.8, 1.5]]) # The length of the input vector for each sample (i.e. number of neurons in the input layer). num_inputs = data_inputs.shape[1] @@ -1022,7 +1022,7 @@ for regression. GANN_instance = pygad.gann.GANN(num_solutions=num_solutions, num_neurons_input=num_inputs, num_neurons_hidden_layers=[2], - num_neurons_output=1, + num_neurons_output=2, hidden_activations=["relu"], output_activation="None") @@ -1071,7 +1071,7 @@ for regression. ga_instance.plot_fitness() # Returning the details of the best solution. - solution, solution_fitness, solution_idx = ga_instance.best_solution() + solution, solution_fitness, solution_idx = ga_instance.best_solution(pop_fitness=ga_instance.last_generation_fitness) print("Parameters of the best solution : {solution}".format(solution=solution)) print("Fitness value of the best solution = {solution_fitness}".format(solution_fitness=solution_fitness)) print("Index of the best solution : {solution_idx}".format(solution_idx=solution_idx)) @@ -1092,7 +1092,7 @@ for regression. The next figure shows how the fitness value changes for the generations used. -.. figure:: https://user-images.githubusercontent.com/16560492/92948154-3cf24b00-f459-11ea-94ea-952b66ab2145.png +.. image:: https://user-images.githubusercontent.com/16560492/92948154-3cf24b00-f459-11ea-94ea-952b66ab2145.png :alt: Regression Example 2 - Fish Weight Prediction @@ -1164,15 +1164,15 @@ Here is the complete code. GANN_instance.update_population_trained_weights(population_trained_weights=population_matrices) print("Generation = {generation}".format(generation=ga_instance.generations_completed)) - print("Fitness = {fitness}".format(fitness=ga_instance.best_solution()[1])) - print("Change = {change}".format(change=ga_instance.best_solution()[1] - last_fitness)) + print("Fitness = {fitness}".format(fitness=ga_instance.best_solution(pop_fitness=ga_instance.last_generation_fitness)[1])) + print("Change = {change}".format(change=ga_instance.best_solution(pop_fitness=ga_instance.last_generation_fitness)[1] - last_fitness)) - last_fitness = ga_instance.best_solution()[1].copy() + last_fitness = ga_instance.best_solution(pop_fitness=ga_instance.last_generation_fitness)[1].copy() # Holds the fitness value of the previous generation. last_fitness = 0 - data = numpy.array(pandas.read_csv("Fish.csv")) + data = numpy.array(pandas.read_csv("../data/Fish.csv")) # Preparing the NumPy array of the inputs. data_inputs = numpy.asarray(data[:, 2:], dtype=numpy.float32) @@ -1237,7 +1237,7 @@ Here is the complete code. ga_instance.plot_fitness() # Returning the details of the best solution. - solution, solution_fitness, solution_idx = ga_instance.best_solution() + solution, solution_fitness, solution_idx = ga_instance.best_solution(pop_fitness=ga_instance.last_generation_fitness) print("Parameters of the best solution : {solution}".format(solution=solution)) print("Fitness value of the best solution = {solution_fitness}".format(solution_fitness=solution_fitness)) print("Index of the best solution : {solution_idx}".format(solution_idx=solution_idx)) @@ -1258,5 +1258,5 @@ Here is the complete code. The next figure shows how the fitness value changes for the 500 generations used. -.. figure:: https://user-images.githubusercontent.com/16560492/92948486-bbe78380-f459-11ea-9e31-0d4c7269d606.png +.. image:: https://user-images.githubusercontent.com/16560492/92948486-bbe78380-f459-11ea-9e31-0d4c7269d606.png :alt: diff --git a/docs/source/pygad.rst b/docs/source/pygad.rst index 73d1ef6a..83d02969 100644 --- a/docs/source/pygad.rst +++ b/docs/source/pygad.rst @@ -4136,6 +4136,149 @@ and also saved in the text file. 2023-04-03 19:04:27 INFO: Generation = 10 2023-04-03 19:04:27 INFO: Fitness = 0.000389832593101348 +Solve Non-Deterministic Problems +================================ + +PyGAD can be used to solve both deterministic and non-deterministic +problems. Deterministic are those that return the same fitness for the +same solution. For non-deterministic problems, a different fitness value +would be returned for the same solution. + +By default, PyGAD settings are set to solve deterministic problems. +PyGAD can save the explored solutions and their fitness to reuse in the +future. These instances attributes can save the solutions: + +1. ``solutions``: Exists if ``save_solutions=True``. + +2. ``best_solutions``: Exists if ``save_best_solutions=True``. + +3. ``last_generation_elitism``: Exists if ``keep_elitism`` > 0. + +4. ``last_generation_parents``: Exists if ``keep_parents`` > 0 or + ``keep_parents=-1``. + +To configure PyGAD for non-deterministic problems, we have to disable +saving the previous solutions. This is by setting these parameters: + +1. ``keep_elisitm=0`` + +2. ``keep_parents=0`` + +3. ``keep_solutions=False`` + +4. ``keep_best_solutions=False`` + +.. code:: python + + import pygad + ... + ga_instance = pygad.GA(..., + keep_elitism=0, + keep_parents=0, + save_solutions=False, + save_best_solutions=False, + ...) + +This way PyGAD will not save any explored solution and thus the fitness +function have to be called for each individual solution. + +Reuse the Fitness instead of Calling the Fitness Function +========================================================= + +It may happen that a previously explored solution in generation X is +explored again in another generation Y (where Y > X). For some problems, +calling the fitness function takes much time. + +For deterministic problems, it is better to not call the fitness +function for an already explored solutions. Instead, reuse the fitness +of the old solution. PyGAD supports some options to help you save time +calling the fitness function for a previously explored solution. + +The parameters explored in this section can be set in the constructor of +the ``pygad.GA`` class. + +The ``cal_pop_fitness()`` method of the ``pygad.GA`` class checks these +parameters to see if there is a possibility of reusing the fitness +instead of calling the fitness function. + +.. _1-savesolutions: + +1. ``save_solutions`` +--------------------- + +It defaults to ``False``. If set to ``True``, then the population of +each generation is saved into the ``solutions`` attribute of the +``pygad.GA`` instance. In other words, every single solution is saved in +the ``solutions`` attribute. + +.. _2-savebestsolutions: + +2. ``save_best_solutions`` +-------------------------- + +It defaults to ``False``. If ``True``, then it only saves the best +solution in every generation. + +.. _3-keepelitism: + +3. ``keep_elitism`` +------------------- + +It accepts an integer and defaults to 1. If set to a positive integer, +then it keeps the elitism of one generation available in the next +generation. + +.. _4-keepparents: + +4. ``keep_parents`` +------------------- + +It accepts an integer and defaults to -1. It set to ``-1`` or a positive +integer, then it keeps the parents of one generation available in the +next generation. + +Why the Fitness Function is not Called for Solution at Index 0? +=============================================================== + +PyGAD has a parameter called ``keep_elitism`` which defaults to 1. This +parameter defines the number of best solutions in generation **X** to +keep in the next generation **X+1**. The best solutions are just copied +from generation **X** to generation **X+1** without making any change. + +.. code:: python + + ga_instance = pygad.GA(..., + keep_elitism=1, + ...) + +The best solutions are copied at the beginning of the population. If +``keep_elitism=1``, this means the best solution in generation X is kept +in the next generation X+1 at index 0 of the population. If +``keep_elitism=2``, this means the 2 best solutions in generation X are +kept in the next generation X+1 at indices 0 and 1 of the population of +generation 1. + +Because the fitness of these best solutions are already calculated in +generation X, then their fitness values will not be recalculated at +generation X+1 (i.e. the fitness function will not be called for these +solutions again). Instead, their fitness values are just reused. This is +why you see that no solution with index 0 is passed to the fitness +function. + +To force calling the fitness function for each solution in every +generation, consider setting ``keep_elitism`` and ``keep_parents`` to 0. +Moreover, keep the 2 parameters ``save_solutions`` and +``save_best_solutions`` to their default value ``False``. + +.. code:: python + + ga_instance = pygad.GA(..., + keep_elitism=0, + keep_parents=0, + save_solutions=False, + save_best_solutions=False, + ...) + Batch Fitness Calculation ========================= diff --git a/pygad/__init__.py b/pygad/__init__.py index 23292431..9df8e5cc 100644 --- a/pygad/__init__.py +++ b/pygad/__init__.py @@ -1,3 +1,3 @@ from .pygad import * # Relative import. -__version__ = "3.1.0" +__version__ = "3.1.1" diff --git a/pygad/pygad.py b/pygad/pygad.py index 01ace61f..d9a7564e 100644 --- a/pygad/pygad.py +++ b/pygad/pygad.py @@ -345,7 +345,11 @@ def __init__(self, elif len(gene_type) == 2 and gene_type[0] in GA.supported_float_types and (type(gene_type[1]) in GA.supported_int_types or gene_type[1] is None): self.gene_type = gene_type self.gene_type_single = True - # A single data type of int with precision. + # A single data type of integer with precision None ([int, None]). + elif len(gene_type) == 2 and gene_type[0] in GA.supported_int_types and gene_type[1] is None: + self.gene_type = gene_type + self.gene_type_single = True + # Raise an exception for a single data type of int with integer precision. elif len(gene_type) == 2 and gene_type[0] in GA.supported_int_types and (type(gene_type[1]) in GA.supported_int_types or gene_type[1] is None): self.gene_type_single = False raise ValueError(f"Integers cannot have precision. Please use the integer data type directly instead of {gene_type}.") @@ -362,10 +366,8 @@ def __init__(self, self.valid_parameters = False raise ValueError(f"When the parameter 'gene_type' is nested, then it can be either [float, int] or with length equal to the value passed to the 'num_genes' parameter. Instead, value {gene_type} with len(gene_type) ({len(gene_type)}) != len(num_genes) ({num_genes}) found.") for gene_type_idx, gene_type_val in enumerate(gene_type): - if gene_type_val in GA.supported_float_types: - # If the gene type is float and no precision is passed, set it to None. - gene_type[gene_type_idx] = [gene_type_val, None] - elif gene_type_val in GA.supported_int_types: + if gene_type_val in GA.supported_int_float_types: + # If the gene type is float and no precision is passed or an integer, set its precision to None. gene_type[gene_type_idx] = [gene_type_val, None] elif type(gene_type_val) in [list, tuple, numpy.ndarray]: # A float type is expected in a list/tuple/numpy.ndarray of length 2. @@ -376,6 +378,12 @@ def __init__(self, else: self.valid_parameters = False raise TypeError(f"In the 'gene_type' parameter, the precision for float gene data types must be an integer but the element {gene_type_val} at index {gene_type_idx} has a precision of {gene_type_val[1]} with type {gene_type_val[0]}.") + elif gene_type_val[0] in GA.supported_int_types: + if gene_type_val[1] is None: + pass + else: + self.valid_parameters = False + raise TypeError(f"In the 'gene_type' parameter, either do not set a precision for integer data types or set it to None. But the element {gene_type_val} at index {gene_type_idx} has a precision of {gene_type_val[1]} with type {gene_type_val[0]}.") else: self.valid_parameters = False raise TypeError( @@ -1638,11 +1646,14 @@ def cal_pop_fitness(self): # The functions numpy.any()/numpy.all()/numpy.where()/numpy.equal() are very slow. # So, list membership operator 'in' is used to check if the solution exists in the 'self.solutions' list. # Make sure that both the solution and 'self.solutions' are of type 'list' not 'numpy.ndarray'. - # if (self.save_solutions) and (len(self.solutions) > 0) and (numpy.any(numpy.all(self.solutions == numpy.array(sol), axis=1))): - # if (self.save_solutions) and (len(self.solutions) > 0) and (numpy.any(numpy.all(numpy.equal(self.solutions, numpy.array(sol)), axis=1))): + # if (self.save_solutions) and (len(self.solutions) > 0) and (numpy.any(numpy.all(self.solutions == numpy.array(sol), axis=1))) + # if (self.save_solutions) and (len(self.solutions) > 0) and (numpy.any(numpy.all(numpy.equal(self.solutions, numpy.array(sol)), axis=1))) if (self.save_solutions) and (len(self.solutions) > 0) and (list(sol) in self.solutions): solution_idx = self.solutions.index(list(sol)) fitness = self.solutions_fitness[solution_idx] + elif (self.save_best_solutions) and (len(self.best_solutions) > 0) and (list(sol) in self.best_solutions): + solution_idx = self.best_solutions.index(list(sol)) + fitness = self.best_solutions_fitness[solution_idx] elif (self.keep_elitism > 0) and (self.last_generation_elitism is not None) and (len(self.last_generation_elitism) > 0) and (list(sol) in last_generation_elitism_as_list): # Return the index of the elitism from the elitism array 'self.last_generation_elitism'. # This is not its index within the population. It is just its index in the 'self.last_generation_elitism' array. @@ -1851,7 +1862,7 @@ def run(self): # Appending the best solution in the initial population to the best_solutions list. if self.save_best_solutions: - self.best_solutions.append(best_solution) + self.best_solutions.append(list(best_solution)) for generation in range(generation_first_idx, generation_last_idx): if not (self.on_fitness is None): @@ -2077,7 +2088,7 @@ def run(self): # Appending the best solution in the current generation to the best_solutions list. if self.save_best_solutions: - self.best_solutions.append(best_solution) + self.best_solutions.append(list(best_solution)) # If the on_generation attribute is not None, then cal the callback function after the generation. if not (self.on_generation is None): diff --git a/pyproject.toml b/pyproject.toml index aa8a6618..0f9ca5ce 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -9,7 +9,7 @@ build-backend = "setuptools.build_meta" [project] name = "pygad" -version = "3.1.0" +version = "3.1.1" description = "PyGAD: A Python Library for Building the Genetic Algorithm and Training Machine Learning Algoithms (Keras & PyTorch)." readme = {file = "README.md", content-type = "text/markdown"} requires-python = ">=3" diff --git a/setup.py b/setup.py index 40a8b787..85af7bf7 100644 --- a/setup.py +++ b/setup.py @@ -5,7 +5,7 @@ setuptools.setup( name="pygad", - version="3.1.0", + version="3.1.1", author="Ahmed Fawzy Gad", install_requires=["numpy", "matplotlib", "cloudpickle",], author_email="ahmed.f.gad@gmail.com",