大文件分割,合并------C++ ------fstream

2023-12-13 04:51:09

将一个大文件(这里测试文件为5.2G)切分为指定大小的文件,然后在把分割后的文件拼接合并为分割前的源文件

#include <boost/timer.hpp> // 计时函数

#include <filesystem>
#include <fstream>
#include <vector>
// 分隔后文件夹的格式, 原文件名_chunk
#define FILE_SUFFIX "_chunk"
// 生成每个小块文件的名称格式, 源文件名_chunk_1.bin
#define CHUNK_NAME "%s_chunk_%d.bin"

/// <summary>
/// 分隔文件函数,将一个文件按照规定的大小分割为多个小块
/// </summary>
/// <param name="inputFile">被分割的文件</param>
/// <param name="chunkSize">分割后每块文件的大小(单位字节): 如果要分割为500mb每块,该数值为 500 * 1024 * 1024 </param>
/// <param name="inputFiles">切分后后的所有文件名</param>
/// <returns></returns>
bool splitFile(const std::string& inputFile, size_t chunkSize, std::vector<std::string>& inputFiles)
{
    std::string inputFileName = std::filesystem::path(inputFile).filename().stem().string();
    std::string dirname = inputFileName + FILE_SUFFIX;
    std::filesystem::path chunkDir = std::filesystem::path(inputFile).parent_path();
    chunkDir.append(dirname);
    if (!std::filesystem::exists(chunkDir))
    {
        std::filesystem::create_directories(chunkDir);
    }

    std::ifstream inputFileStream(inputFile, std::ios::binary | std::ios::ate);
    if (!inputFileStream.is_open()) 
    {
        std::cout << "Error opening file: " << inputFile << std::endl;
        return false;
    }

    size_t fileSize = static_cast<size_t>(inputFileStream.tellg());
    inputFileStream.seekg(0, std::ios::beg);
    //如果文件大小小于
    size_t numberOfChunks = (fileSize + chunkSize - 1) / chunkSize;
    for (size_t i = 0; i < numberOfChunks; ++i) 
    {
        size_t chunkOffset = i * chunkSize;
        size_t chunkSizeActual = std::min(chunkSize, fileSize - chunkOffset);

        //std::vector<char> buffer(chunkSizeActual);
        char buff[256];
        sprintf(buff, "%s_chunk_%d.bin", inputFileName.c_str(), static_cast<int>(i));
        std::filesystem::path tempChunkDir = chunkDir;
        std::string chunkFileName = tempChunkDir.append(buff).string();
        inputFiles.push_back(chunkFileName);

        char* buffer = new char[chunkSizeActual];
        // 移动文件读取指针到当前块的起始位置
        inputFileStream.seekg(chunkOffset, std::ios::beg);
        inputFileStream.read(buffer, chunkSizeActual);
        std::ofstream chunkFile(chunkFileName, std::ios::binary);
        if (!chunkFile.is_open()) 
        {
            std::cout << "Error creating chunk file: " << chunkFileName << std::endl;
            delete[] buffer;
            return false;
        }
        chunkFile.write(buffer, chunkSizeActual);
        chunkFile.close();
        delete[] buffer;
        std::cout << "save split: " << chunkFileName << std::endl;
    }

    inputFileStream.close();
    return true;
}

/// <summary>
/// 将多个文件合并为一个文件
/// </summary>
/// <param name="outputFileName">合并后的文件</param>
/// <param name="inputFiles">需要合并的小文件</param>
/// <returns></returns>
bool mergeFiles(const std::string& outputFileName, const std::vector<std::string>& inputFiles) 
{
    std::ofstream outputFile(outputFileName, std::ios::binary);

    if (!outputFile.is_open()) 
    {
        std::cout << "Error creating output file: " << outputFileName << std::endl;
        return false;
    }

    for (const auto& inputFile : inputFiles) 
    {
        std::ifstream inputFileStream(inputFile, std::ios::binary);

        if (!inputFileStream.is_open()) {
            std::cout << "Error opening input file: " << inputFile << std::endl;
            return false;
        }

        outputFile << inputFileStream.rdbuf();
        inputFileStream.close();
        std::cout << "merge: " << inputFile << std::endl;
    }

    outputFile.close();
    return true;
}


int main() {
    std::string inputFile = "./split_file.rar";   // 这里测试文件大小为5.2G
    std::string mergedFileName = "./merged_file.rar";
    size_t chunkSize = 500 * 1024 * 1024;  // 500 MB
    boost::timer tm1; // 定义后计时开始
    tm1.restart();  // 从新从这里开始计时
    std::vector<std::string> inputFiles;
    bool sp = splitFile(inputFile, chunkSize, inputFiles);
    if (sp)
    {
        std::cout << "splitFile successed" << std::endl;
    }
    else
    {
        std::cout << "splitFile failed" << std::endl;
    }
    std::cout << tm1.elapsed() << std::endl;  // 单位是秒
    tm1.restart();  // 从新从这里开始计时
    bool me = mergeFiles(mergedFileName, inputFiles);
    if (me)
    {
        std::cout << "mergeFiles successed" << std::endl;
    }
    else
    {
        std::cout << "mergeFiles failed" << std::endl;
    }
    std::cout << tm1.elapsed() << std::endl;  // 单位是秒
    return 0;
}

文章来源:https://blog.csdn.net/mankeywang/article/details/134870404
本文来自互联网用户投稿,该文观点仅代表作者本人,不代表本站立场。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。