基于curl 使用http多线程下载大文件

2023-12-13 23:50:24

一、获取文件大小

int64_t CHttpClient::GetFileSize(const std::string &url)
{
    auto curl = curl_easy_init();
    if (!curl)
    {
        curl_easy_cleanup(curl);
        return -1;
    }
    double filesize = -1; //文件大小

    curl_easy_setopt(curl,CURLOPT_URL, url.c_str());
    curl_easy_setopt(curl, CURLOPT_HEADER, 1); 
    curl_easy_setopt(curl, CURLOPT_NOBODY, 1); 
    CURLcode res_code = curl_easy_perform(curl); //请求
    if (res_code != CURLE_OK)
    {
        curl_easy_cleanup(curl);
        return -1;
    }
    
    curl_easy_getinfo(curl, CURLINFO_CONTENT_LENGTH_DOWNLOAD, &filesize); 
    curl_easy_cleanup(curl);

    std::cout << "获取文件大小完成:" << (int64_t)filesize  << std::endl;
    return filesize;
}

二、划分线程

    auto client = GetClient();
    auto length = client->GetFileSize(url);
//根据线程数划分块大小
    auto blockSize = length / thNum;
    auto lastBlock = (length % thNum == 0 ? blockSize : blockSize + length % thNum );
    
    //启动多个线程进行下载
    std::vector<std::future<bool>> vecFt;
    std::vector<std::shared_ptr<INetProtocol>> vecDownload;
    for (size_t i = 0; i < thNum; i++)
    {
        auto block = (i == thNum - 1 ? lastBlock : blockSize);
        auto dlClient = GetClient();
        vecDownload.push_back(dlClient);
        vecFt.emplace_back(std::async(std::bind(&INetProtocol::Download, dlClient, url, blockSize * i,  block)));
    }

三、下载区间数据

bool CHttpClient::Download(const std::string &url, int64_t offset, int64_t blocksize)
{
    auto curl = curl_easy_init();
    if (!curl)
    {
        curl_easy_cleanup(curl);
        return false;
    }
    auto rangeEnd = offset + blocksize - 1;
    std::cout << "开始下载数据,区间:" << offset << "-" <<  rangeEnd  << ",块大小:" << blocksize << std::endl;
	m_blockData = std::make_shared<BlockData>(offset, blocksize);
	curl_easy_setopt(curl, CURLOPT_URL, url.c_str());
    curl_easy_setopt(curl, CURLOPT_FTP_RESPONSE_TIMEOUT, 10);
	curl_easy_setopt(curl, CURLOPT_WRITEDATA, this);
	curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, CHttpClient::WriteBlock);
    std::stringstream ss;
    ss << offset << "-" << rangeEnd;
	curl_easy_setopt(curl, CURLOPT_RANGE, ss.str().c_str());
	CURLcode res_code = curl_easy_perform(curl);
    if (res_code != CURLE_OK)
    {
        curl_easy_cleanup(curl);
        return false;
    }
	std::cout << "下载数据:" << offset << "-" << rangeEnd << ", 获取数据:" << m_blockData->readSize << "完成" << std::endl;
	curl_easy_cleanup(curl);
    return true;
}

四、等待所有线程完成

//同步等待所有线程完成
    bool isFaild = false;
    for (auto i = 0; i < vecFt.size(); i++)
	{
        if (!vecFt[i].get())
        {
            isFaild = true;
            std::cout << "线程:" << i << "下载失败" << std::endl;
        }       
    }

五、合并线程数据

void CDownload::MergerData(const std::string &fileName, std::vector<std::shared_ptr<INetProtocol>>& vecDownload)
{
    std::ofstream fout(fileName, std::ios::out | std::ios::binary);
	if (!fout)
	{
        throw std::runtime_error("create local file faild");
	}
    for (auto& item : vecDownload)
    {
        fout << item->GetStream().rdbuf();
    }
    
}

6、获取服务器文件MD5

std::string CHttpClient::GetContextMd5(const std::string &url)
{
    auto curl = curl_easy_init();
    if (!curl)
    {
        curl_easy_cleanup(curl);
        return std::string();
    }


    curl_easy_setopt(curl,CURLOPT_URL, url.c_str());
    curl_easy_setopt(curl, CURLOPT_HEADER, 1); 
    curl_easy_setopt(curl, CURLOPT_NOBODY, 1); 

    std::string strMd5;
    curl_easy_setopt(curl, CURLOPT_HEADERDATA, &strMd5);
    curl_easy_setopt(curl, CURLOPT_HEADERFUNCTION, CHttpClient::ReadHeader);
    CURLcode res_code = curl_easy_perform(curl); //请求
    if (res_code != CURLE_OK)
    {
        curl_easy_cleanup(curl);
        return std::string();
    }
    
    curl_easy_cleanup(curl);
    std::cout << "获取Md5完成:" << strMd5  << std::endl;
    return strMd5;
}

7、检验文件MD5是否一致

bool CDownload::CheckFile(const std::string &url, const std::string& strLocalFile)
{
    auto client = GetClient();
    auto contextMd5 = client->GetContextMd5(url);
    auto localMd5 = GetContentMd5(strLocalFile);
    std::cout << "context    md5:" << contextMd5 << std::endl;
    std::cout << "local file md5:" << localMd5  << std::endl;
    if (contextMd5 != localMd5)
    {
        std::cout << "md5 校验失败, 文件下载失败" << std::endl;
        return false;
    }
    std::cout << "md5 校验一致, 文件下载成功" << std::endl;
    return true;
}


//获取本地文件MD5,然后通过Base64编码输出
std::string CDownload::GetContentMd5(const std::string &fileName)
{
    std::ifstream fin(fileName, std::ios::in  | std::ios::binary);
    if (!fin)
    {
        throw std::runtime_error("获取文件MD5失败,文件打开失败");
    }

    EVP_MD_CTX  *mdctx;
    unsigned char *md5_digest;
    unsigned int md5_digest_len = EVP_MD_size(EVP_md5());

    mdctx = EVP_MD_CTX_new();
    EVP_DigestInit_ex(mdctx, EVP_md5(), NULL);

    const int bufSize = 4 * 1024 * 1024;
    char *pszBuf = new char[bufSize];
    while (!fin.eof())
    {
        fin.read(pszBuf, bufSize);
        EVP_DigestUpdate(mdctx, pszBuf, fin.gcount());
    }
    md5_digest = (unsigned char *)OPENSSL_malloc(md5_digest_len);
    EVP_DigestFinal_ex(mdctx, md5_digest, &md5_digest_len);
    EVP_MD_CTX_free(mdctx);
    delete[] pszBuf;
    return CBase64::encode(std::string((char*)md5_digest, md5_digest_len));
}

如需完整代码,可评论区留言

文章来源:https://blog.csdn.net/fengqiao1999/article/details/134863537
本文来自互联网用户投稿,该文观点仅代表作者本人,不代表本站立场。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。