simdtutor icon indicating copy to clipboard operation
simdtutor copied to clipboard

unordered_map 优化问题

Open muyuuuu opened this issue 8 months ago • 0 comments

看你C++大典写了好多 unordered_map 的用法, 想问一下如何优化

#include <chrono>
#include <climits>
#include <iostream>
#include <mutex>
#include <random>
#include <thread>
#include <unordered_map>
#include <vector>

using namespace std;
using namespace std::chrono;

// 生成随机数据
vector<int> generate_random_data(size_t size) {
  vector<int> data(size);
  random_device rd;
  mt19937 gen(rd());
  uniform_int_distribution<int> dis(0, 1e6 / 10);

  for (size_t i = 0; i < size; ++i) {
    data[i] = dis(gen);
  }
  return data;
}

// 单线程版本
void single_thread_insert(const vector<int> &data,
                          unordered_map<int, size_t> &map) {
  for (size_t i = 0; i < data.size(); ++i) {
    if (!map.count(data[i])) {
      map[data[i]] = i;
    }
  }
}

// 多线程版本 - 每个线程处理一部分数据
void multi_thread_insert(const vector<int> &data,
                         unordered_map<int, size_t> &global_map,
                         int num_threads) {
  vector<thread> threads;
  size_t chunk_size = data.size() / num_threads;
  std::vector<unordered_map<int, size_t>> local_map;
  local_map.resize(num_threads);

  auto worker = [&](size_t start, size_t end, int t) {
    auto local = local_map[t];
    for (size_t i = start; i < end; ++i) {
      if (!local.count(data[i])) {
        local[data[i]] = i;
      }
    }

    // 合并到全局map ...
  };

  for (int t = 0; t < num_threads; ++t) {
    size_t start = t * chunk_size;
    size_t end = (t == num_threads - 1) ? data.size() : start + chunk_size;
    threads.emplace_back(worker, start, end, t);
  }

  for (auto &t : threads) {
    t.join();
  }
}

int main() {
  const size_t data_size = 1e6; // 1百万个元素
  const int num_threads =
      thread::hardware_concurrency(); // 使用硬件支持的线程数

  // 生成测试数据
  cout << "Generating random data..." << endl;
  auto data = generate_random_data(data_size);

  // 单线程测试
  cout << "\nSingle-threaded test:" << endl;
  unordered_map<int, size_t> st_map;
  st_map.reserve(data_size);

  auto start = high_resolution_clock::now();
  single_thread_insert(data, st_map);
  auto end = high_resolution_clock::now();

  auto st_duration = duration_cast<milliseconds>(end - start);
  cout << "Time: " << st_duration.count() << " ms" << endl;

  cout << "\nMulti-threaded test (" << num_threads << " threads):" << endl;
  unordered_map<int, size_t> mt_map;
  mt_map.reserve(data_size);

  start = high_resolution_clock::now();
  multi_thread_insert(data, mt_map, num_threads);
  end = high_resolution_clock::now();

  auto mt_duration = duration_cast<milliseconds>(end - start);
  cout << "Time: " << mt_duration.count() << " ms" << endl;

  return 0;
}

就是多线程插入的比单线程的还慢。。。

muyuuuu avatar May 15 '25 18:05 muyuuuu