54 lines
2.0 KiB
Python
54 lines
2.0 KiB
Python
import os
|
|
import shutil
|
|
|
|
# 数据集路径
|
|
dataset_path = '/mnt/Study/DataSet/DataSet/fgvc-aircraft-2013b/fgvc-aircraft-2013b/data/images'
|
|
test_output_path = '/mnt/Study/DataSet/DataSet/fgvc-aircraft-2013b/fgvc-aircraft-2013b/data/test_sorted_images'
|
|
train_output_path = '/mnt/Study/DataSet/DataSet/fgvc-aircraft-2013b/fgvc-aircraft-2013b/data/train_sorted_images'
|
|
|
|
# 类别文件,例如 'images_variant_trainval.txt'
|
|
# 有两个文件,一个是训练集和验证集,一个是测试集
|
|
test_labels_file = '/mnt/Study/DataSet/DataSet/fgvc-aircraft-2013b/fgvc-aircraft-2013b/data/images_variant_test.txt'
|
|
train_labels_file = '/mnt/Study/DataSet/DataSet/fgvc-aircraft-2013b/fgvc-aircraft-2013b/data/images_variant_train.txt'
|
|
|
|
# 创建输出文件夹
|
|
if not os.path.exists(test_output_path):
|
|
os.makedirs(test_output_path)
|
|
if not os.path.exists(train_output_path):
|
|
os.makedirs(train_output_path)
|
|
|
|
# 读取类别文件
|
|
with open(test_labels_file, 'r') as f:
|
|
test_lines = f.readlines()
|
|
with open(train_labels_file, 'r') as f:
|
|
train_lines = f.readlines()
|
|
|
|
def sort_images(lines, output_path):
|
|
count = 0
|
|
for line in lines:
|
|
count += 1
|
|
print(f'Processing image {count}/{len(lines)}', end='\r')
|
|
parts = line.strip().split(' ')
|
|
image_name = parts[0] + '.jpg'
|
|
category = '_'.join(parts[1:]).replace('/', '_')
|
|
|
|
# 创建类别文件夹
|
|
category_path = os.path.join(output_path, category)
|
|
if not os.path.exists(category_path):
|
|
os.makedirs(category_path)
|
|
|
|
# 移动图像到对应类别文件夹
|
|
src = os.path.join(dataset_path, image_name)
|
|
dst = os.path.join(category_path, image_name)
|
|
if os.path.exists(src):
|
|
shutil.move(src, dst)
|
|
else:
|
|
print(f'Image {image_name} not found!')
|
|
|
|
print("Sorting test images into folders by category...")
|
|
sort_images(test_lines, test_output_path)
|
|
print("Sorting train images into folders by category...")
|
|
sort_images(train_lines, train_output_path)
|
|
|
|
print("Images have been sorted into folders by category.")
|