swap-nas/preprocess_aircraft.py

54 lines
2.0 KiB
Python

import os
import shutil
# 数据集路径
dataset_path = '/mnt/Study/DataSet/DataSet/fgvc-aircraft-2013b/fgvc-aircraft-2013b/data/images'
test_output_path = '/mnt/Study/DataSet/DataSet/fgvc-aircraft-2013b/fgvc-aircraft-2013b/data/test_sorted_images'
train_output_path = '/mnt/Study/DataSet/DataSet/fgvc-aircraft-2013b/fgvc-aircraft-2013b/data/train_sorted_images'
# 类别文件,例如 'images_variant_trainval.txt'
# 有两个文件,一个是训练集和验证集,一个是测试集
test_labels_file = '/mnt/Study/DataSet/DataSet/fgvc-aircraft-2013b/fgvc-aircraft-2013b/data/images_variant_test.txt'
train_labels_file = '/mnt/Study/DataSet/DataSet/fgvc-aircraft-2013b/fgvc-aircraft-2013b/data/images_variant_train.txt'
# 创建输出文件夹
if not os.path.exists(test_output_path):
os.makedirs(test_output_path)
if not os.path.exists(train_output_path):
os.makedirs(train_output_path)
# 读取类别文件
with open(test_labels_file, 'r') as f:
test_lines = f.readlines()
with open(train_labels_file, 'r') as f:
train_lines = f.readlines()
def sort_images(lines, output_path):
count = 0
for line in lines:
count += 1
print(f'Processing image {count}/{len(lines)}', end='\r')
parts = line.strip().split(' ')
image_name = parts[0] + '.jpg'
category = '_'.join(parts[1:]).replace('/', '_')
# 创建类别文件夹
category_path = os.path.join(output_path, category)
if not os.path.exists(category_path):
os.makedirs(category_path)
# 移动图像到对应类别文件夹
src = os.path.join(dataset_path, image_name)
dst = os.path.join(category_path, image_name)
if os.path.exists(src):
shutil.move(src, dst)
else:
print(f'Image {image_name} not found!')
print("Sorting test images into folders by category...")
sort_images(test_lines, test_output_path)
print("Sorting train images into folders by category...")
sort_images(train_lines, train_output_path)
print("Images have been sorted into folders by category.")