Auto Byte

Science AI

# 基于Python的遗传算法特征约简（附代码）

#### 更多关于遗传算法的信息

1. 遗传算法优化介绍

https://www.kdnuggets.com/2018/03/introduction-optimization-with-genetic-algorithm.html

https://towardsdatascience.com/introduction-to-optimization-with-genetic-algorithm-2f5001d9964b

2. 遗传算法优化-逐步示例

3. python中的遗传算法实现

https://www.kdnuggets.com/2018/07/genetic-algorithm-implementation-python.html

https://towardsdatascience.com/genetic-algorithm-implementation-in-python-5ab67bb124a6

Springer链接：

https://www.springer.com/us/book/978148424166

#### 利用Python实现

import numpy

import GA

import pickle

import matplotlib.pyplot

f = open("dataset_features.pkl", "rb")

f.close()

f = open("outputs.pkl", "rb")

f.close()

num_samples = data_inputs.shape[0]

num_feature_elements = data_inputs.shape[1]

train_indices = numpy.arange(1, num_samples, 4)

test_indices = numpy.arange(0, num_samples, 4)

print("Number of training samples: ", train_indices.shape[0])

print("Number of test samples: ", test_indices.shape[0])

"""

Genetic algorithm parameters:

Population size

Mating pool size

Number of mutations

"""

sol_per_pop = 8 # Population size.

num_parents_mating = 4 # Number of parents inside the mating pool.

num_mutations = 3 # Number of elements to mutate.

# Defining the population shape.

pop_shape = (sol_per_pop, num_feature_elements)

# Creating the initial population.

new_population = numpy.random.randint(low=0, high=2, size=pop_shape)

print(new_population.shape)

best_outputs = []

num_generations = 100

for generation in range(num_generations):

print("Generation : ", generation)

# Measuring the fitness of each chromosome in the population.

fitness = GA.cal_pop_fitness(new_population, data_inputs, data_outputs, train_indices, test_indices)

best_outputs.append(numpy.max(fitness))

# The best result in the current iteration.

print("Best result : ", best_outputs[-1])

# Selecting the best parents in the population for mating

parents = GA.select_mating_pool(new_population, fitness, num_parents_mating)

# Generating next generation using crossover

offspring_crossover = GA.crossover(parents, offspring_size=(pop_shape[0]-parents.shape[0], num_feature_elements))

# Adding some variations to the offspring using mutation.

offspring_mutation = GA.mutation(offspring_crossover, num_mutations=num_mutations)

# Creating the new population based on the parents and offspring.

new_population[0:parents.shape[0], :] = parents

new_population[parents.shape[0]:, :] = offspring_mutation

fitness = GA.cal_pop_fitness(new_population, data_inputs, data_outputs, train_indices, test_indices)

# Then return the index of that solution corresponding to the best fitness.

best_match_idx = numpy.where(fitness == numpy.max(fitness))[0]

best_match_idx = best_match_idx[0]

best_solution = new_population[best_match_idx, :]

best_solution_indices = numpy.where(best_solution == 1)[0]

best_solution_num_elements = best_solution_indices.shape[0]

best_solution_fitness = fitness[best_match_idx]

print("best_match_idx : ", best_match_idx)

print("best_solution : ", best_solution)

print("Selected indices : ", best_solution_indices)

print("Number of selected elements : ", best_solution_num_elements)

print("Best solution fitness : ", best_solution_fitness)

matplotlib.pyplot.plot(best_outputs)

matplotlib.pyplot.xlabel("Iteration")

matplotlib.pyplot.ylabel("Fitness")

matplotlib.pyplot.show()

import numpy

import GA

import pickle

import matplotlib.pyplot

f = open("dataset_features.pkl", "rb")

f.close()

f = open("outputs.pkl", "rb")

f.close()

num_samples = data_inputs.shape[0]

num_feature_elements = data_inputs.shape[1]

train_indices = numpy.arange(1, num_samples, 4)

test_indices = numpy.arange(0, num_samples, 4)

print("Number of training samples: ", train_indices.shape[0])

print("Number of test samples: ", test_indices.shape[0])

"""

Genetic algorithm parameters:

Population size

Mating pool size

Number of mutations

"""

sol_per_pop = 8 # Population size

num_parents_mating = 4 # Number of parents inside the mating pool.

num_mutations = 3 # Number of elements to mutate.

# Defining the population shape.

pop_shape = (sol_per_pop, num_feature_elements)

# Creating the initial population.

new_population = numpy.random.randint(low=0, high=2, size=pop_shape)

print(new_population.shape)

best_outputs = []

num_generations = 100

for generation in range(num_generations):

print("Generation : ", generation)

# Measuring the fitness of each chromosome in the population.

fitness = GA.cal_pop_fitness(new_population, data_inputs, data_outputs, train_indices, test_indices)

best_outputs.append(numpy.max(fitness))

# The best result in the current iteration.

print("Best result : ", best_outputs[-1])

# Selecting the best parents in the population for mating.

parents = GA.select_mating_pool(new_population, fitness, num_parents_mating)

# Generating next generation using crossover.

offspring_crossover = GA.crossover(parents, offspring_size=(pop_shape[0]-parents.shape[0], num_feature_elements))

# Adding some variations to the offspring using mutation.

offspring_mutation = GA.mutation(offspring_crossover, num_mutations=num_mutations)

# Creating the new population based on the parents and offspring.

new_population[0:parents.shape[0], :] = parents

new_population[parents.shape[0]:, :] = offspring_mutation

# Getting the best solution after iterating finishing all generations.

# At first, the fitness is calculated for each solution in the final generation.

fitness = GA.cal_pop_fitness(new_population, data_inputs, data_outputs, train_indices, test_indices)

# Then return the index of that solution corresponding to the best fitness.

best_match_idx = numpy.where(fitness == numpy.max(fitness))[0]

best_match_idx = best_match_idx[0]

best_solution = new_population[best_match_idx, :]

best_solution_indices = numpy.where(best_solution == 1)[0]

best_solution_num_elements = best_solution_indices.shape[0]

best_solution_fitness = fitness[best_match_idx]

print("best_match_idx : ", best_match_idx)

print("best_solution : ", best_solution)

print("Selected indices : ", best_solution_indices)

print("Number of selected elements : ", best_solution_num_elements)

print("Best solution fitness : ", best_solution_fitness)

matplotlib.pyplot.plot(best_outputs)

matplotlib.pyplot.xlabel("Iteration")

matplotlib.pyplot.ylabel("Fitness")

matplotlib.pyplot.show()

#### GA.py的实现

GA.py文件的实现如下所示。在cal_pop_fitness（）函数中，SVC根据每个解决方案选择的特征元素进行培训。在训练前，根据所选的基因值为1的元素过滤特征。这是在reduce_features（）函数中完成的。除了所有示例的完整功能外，它还接受当前的解决方案。

crossover（）和mutation（）函数的实现与我之前的教程“Python中的遗传算法实现”中讨论的非常相似。一个主要的区别是，mutation（）函数通过翻转随机选择的基因的值来改变它们，因为我们使用的是二进制表示。

import numpy

import sklearn.svm

def reduce_features(solution, features):

selected_elements_indices = numpy.where(solution == 1)[0]

reduced_features = features[:, selected_elements_indices]

return reduced_features

def classification_accuracy(labels, predictions):

correct = numpy.where(labels == predictions)[0]

accuracy = correct.shape[0]/labels.shape[0]

return accuracy

def cal_pop_fitness(pop, features, labels, train_indices, test_indices):

accuracies = numpy.zeros(pop.shape[0])

idx = 0

for curr_solution in pop:

reduced_features = reduce_features(curr_solution, features)

train_data = reduced_features[train_indices, :]

test_data = reduced_features[test_indices, :]

train_labels = labels[train_indices]

test_labels = labels[test_indices]

SV_classifier = sklearn.svm.SVC(gamma='scale')

SV_classifier.fit(X=train_data, y=train_labels)

predictions = SV_classifier.predict(test_data)

accuracies[idx] = classification_accuracy(test_labels, predictions)

idx = idx + 1

return accuracies

def select_mating_pool(pop, fitness, num_parents):

# Selecting the best individuals in the current generation as parents for producing the offspring of the next generation.

parents = numpy.empty((num_parents, pop.shape[1]))

for parent_num in range(num_parents):

max_fitness_idx = numpy.where(fitness == numpy.max(fitness))

max_fitness_idx = max_fitness_idx[0][0]

parents[parent_num, :] = pop[max_fitness_idx, :]

fitness[max_fitness_idx] = -99999999999

return parents

def crossover(parents, offspring_size):

offspring = numpy.empty(offspring_size)

# The point at which crossover takes place between two parents. Usually, it is at the center.

crossover_point = numpy.uint8(offspring_size[1]/2)

for k in range(offspring_size[0]):

# Index of the first parent to mate.

parent1_idx = k%parents.shape[0]

# Index of the second parent to mate.

parent2_idx = (k+1)%parents.shape[0]

# The new offspring will have its first half of its genes taken from the first parent.

offspring[k, 0:crossover_point] = parents[parent1_idx, 0:crossover_point]

# The new offspring will have its second half of its genes taken from the second parent.

offspring[k, crossover_point:] = parents[parent2_idx, crossover_point:]

return offspring

def mutation(offspring_crossover, num_mutations=2):

mutation_idx = numpy.random.randint(low=0, high=offspring_crossover.shape[1], size=num_mutations)

# Mutation changes a single gene in each offspring randomly.

for idx in range(offspring_crossover.shape[0]):

# The random value to be added to the gene.

offspring_crossover[idx, mutation_idx] = 1 - offspring_crossover[idx, mutation_idx]

return offspring_crossover

Feature Reduction using Genetic Algorithm with Python

https://www.kdnuggets.com/2019/03/feature-reduction-genetic-algorithm-python.html

THU数据派

THU数据派"基于清华，放眼世界"，以扎实的理工功底闯荡“数据江湖”。发布全球大数据资讯，定期组织线下活动，分享前沿产业动态。了解清华大数据，敬请关注姐妹号“数据派THU”。

（人工）神经网络是一种起源于 20 世纪 50 年代的监督式机器学习模型，那时候研究者构想了「感知器（perceptron）」的想法。这一领域的研究者通常被称为「联结主义者（Connectionist）」，因为这种模型模拟了人脑的功能。神经网络模型通常是通过反向传播算法应用梯度下降训练的。目前神经网络有两大主要类型，它们都是前馈神经网络：卷积神经网络（CNN）和循环神经网络（RNN），其中 RNN 又包含长短期记忆（LSTM）、门控循环单元（GRU）等等。深度学习是一种主要应用于神经网络帮助其取得更好结果的技术。尽管神经网络主要用于监督学习，但也有一些为无监督学习设计的变体，比如自动编码器和生成对抗网络（GAN）。

360公司成立于2005年8月，创始人周鸿祎 2011年3月30日在纽交所成功上市 2018年2月28日，回归A股上市，上证交易所（601360） 是中国第一大互联网安全公司，用户6.5亿，市场渗透率94.7% 中国第一大移动互联网安全公司，用户数超过8.5亿 中国领先的AIoT公司，将人工智能技术应用于智能生活、家庭安防、出行安全、儿童安全等多个领域

http://smart.360.cn/cleanrobot/