外排序介绍
外排序是指能够处理极大量数据的排序算法。通常来说,外排序处理的数据不能一次装入内存,只能放在读写较慢的外存储器(通常是硬盘)上。外排序通常采用的是⼀种“排序-归并”的策略。在排序阶段,先读入能放在内存中的数据量,将其排序输出到⼀个临时文件,依此进行,将待排序数据组织为多个有序的临时文件。然后在归并阶段将这些临时文件组合为⼀个大的有序文件,也即排序结果。
跟外排序对应的就是内排序,之前常见的排序,都是内排序,这些排序思想适应的是数据在内存中,支持随机访问。归并排序的思想不需要随机访问数据,只需要依次按序列读取数据,所以归并排序既是⼀个内排序,也是⼀个外排序。
创建随机数据文件的代码
void CreatData()
{int n = 100000;srand((unsigned int)time(NULL));const char* file = "data.txt";FILE* fin = fopen(file, "w");if (fin == NULL){perror("foprn fail!");return;}for (int i = 0; i < n; i++){int x = rand() % n + 1;fprintf(fin, "%d\n", x);}fclose(fin);
}
文件归并排序思路分析
- 读取n个值排序后写到
file1
,再读取n个值排序后写到file2
file1
和file2
利⽤归并排序的思想,依次读取比较,取小的尾插到mfile
,mfile
归并为⼀个有序文件- 将
file1
和file2
删掉,mfile
重命名为file1
- 再次读取n个数据排序后写到
file2
- 继续⾛
file1
和file2
归并,重复步骤2,直到文件中无法读出数据。最后归并出的有序数据放到了file1
中
文件归并排序代码实现
- 创建随机数据文件的代码
#include<stdio.h>
#include<stdlib.h>
#include<time.h>void CreatData()
{int n = 100000;srand((unsigned int)time(NULL));const char* file = "data.txt";FILE* fin = fopen(file, "w");if (fin == NULL){perror("foprn fail!");return;}for (int i = 0; i < n; i++){int x = rand()+i;fprintf(fin, "%d\n", x);}fclose(fin);
}
- 取n个数据放入
file1
中,并进行排序
int compare(const void* a, const void* b)
{return (*(int*)a - *(int*)b);
}int ReadNDataSortToFile(FILE* fout, int n, const char* file1)
{int x = 0;int* a = (int*)malloc(sizeof(int) * n);if (a == NULL){perror("malloc fail!");return 0;}int j = 0;for (int i = 0; i < n; i++){if (fscanf(fout, "%d", &x) == EOF)break;a[j++] = x;}if (j == 0){free(a);return 0;}//排序qsort(a, j, sizeof(int), compare);//写入file1FILE* fin = fopen(file1, "w");if (fin == NULL){free(a);perror("fopen fail!");return 0;}for (int i = 0; i < j; i++){fprintf(fin, "%d\n", a[i]);}free(a);fclose(fin);return j;
}
- 将
file1
和file2
合并成mfile
void MergeFile(const char* file1, const char* file2, const char* mfile)
{FILE* fout1 = fopen(file1, "r");if (fout1 == NULL){perror("fopen fail!");return;}FILE* fout2 = fopen(file2, "r");if (fout1 == NULL){perror("fopen fail!");return;}FILE* fin = fopen(mfile, "w");if (fin == NULL){perror("fopen fail!");return;}int x1 = 0, x2 = 0;int ret1 = fscanf(fout1,"%d",&x1);int ret2= fscanf(fout2, "%d", &x2);while (ret1 != EOF && ret2 != EOF){if (x1 < x2){fprintf(fin,"%d\n",x1);ret1 = fscanf(fout1, "%d", &x1);}else{fprintf(fin, "%d\n", x2);ret2 = fscanf(fout2, "%d", &x2);}}while (ret1 != EOF){fprintf(fin, "%d\n", x1);ret1 = fscanf(fout1, "%d", &x1);}while (ret2 != EOF){fprintf(fin, "%d\n", x2);ret2 = fscanf(fout2, "%d", &x2);}fclose(fout1);fclose(fout2);fclose(fin);
}
- 文件归并的主思路
int main()
{CreatData();const char* file1 = "file1.txt";const char* file2 = "file2.txt";const char* mfile = "mfile.txt";FILE* fout = fopen("data.txt", "r");if (fout == NULL){perror("fopen fail!");return 0;}int m = 1000;ReadNDataSortToFile(fout, m, file1);ReadNDataSortToFile(fout, m, file2);while (1){MergeFile(file1, file2, mfile);//删除file1和file2remove(file1);remove(file2);//将mfile改名为file1rename(mfile,file1);//若读不到数据则退出if (ReadNDataSortToFile(fout, m, file2) == 0)break;}return 0;
}