问sha1sum在C++中的实现
EN

Code Review用户

提问于 2015-01-26 22:16:44

回答 1查看 283关注 0票数 8

我很少愤怒地编写C++程序，我认为自己有点生疏，当然也无法在C++11上跟上速度。为了做一些练习，我想我应该尝试实现sha1sum，因为在密码散列函数之类的主题上，这也不是真正的速度。

(是的，是的，你不应该在新代码中使用SHA-1，明白！)

这会成为现代习语C++吗？

#include <vector>
#include <cinttypes>
#include <iostream>
#include <ostream>
#include <istream>
#include <ios>
#include <numeric>
#include <iomanip>
#include <functional>
#include <fstream>
#include <algorithm>


namespace {
const auto blockBits = 512;

const auto int32Size = 4;
const auto int32Bits = int32Size * 8;
const auto intsPerBlock = blockBits / int32Bits;

const auto resultBits = 160;
const auto intsInResult = resultBits / int32Bits;

typedef std::vector<uint32_t> BlockVector;
typedef std::vector<uint32_t> HashVector;


uint32_t rotl(const uint32_t x, const unsigned int& n) {
  const unsigned int rshift = int32Bits - n;
  return (x << n) | (x >> rshift);
}


uint32_t accumulateUint(const uint32_t& left, unsigned char& right) {
  return (left << 8) | right;
}


typedef std::function<uint32_t(const uint32_t&, const uint32_t&, const uint32_t&)> RoundFunction;


RoundFunction& getFunctionForRound(const unsigned int round) {
  static RoundFunction roundFunctions[] = {
    // Ch: rounds 0 - 19
    [] (const uint32_t& x, const uint32_t& y, const uint32_t& z) -> uint32_t {
      return (x & y) ^ (~x & z);
    },

    // Parity: 20-39, 60 - 79
    [] (const uint32_t& x, const uint32_t& y, const uint32_t& z) -> uint32_t {
      return x ^ y ^ z;
    },

    // Maj: 40 - 59
    [] (const uint32_t& x, const uint32_t& y, const uint32_t& z) -> uint32_t {
      return (x & y) ^ (x & z) ^ (y & z);
    }
  };

  return roundFunctions[(round > 59 ? round - 40 : round) / 20];
}


const uint32_t& getConstantForRound(const unsigned int round) {
  static uint32_t constants[] = {
    0x5a827999, // 0 - 29
    0x6ed9eba1, // 20 - 39
    0x8f1bbcdc, // 40 - 59
    0xca62c1d6, // 60 - 79
  };

  return constants[round / 20];
}


struct RoundVariables {
  HashVector workingVars;
  BlockVector W;

  RoundVariables(HashVector& hash, BlockVector&& w) : workingVars(HashVector(hash)), W(w) {}
  HashVector&& getHash() { return std::move(workingVars); }
};


void hashRound(RoundVariables& roundVars, const unsigned int& round) {
  enum {a, b, c, d, e};
  HashVector& vars = roundVars.workingVars;

  uint32_t T = 0u;

  T = rotl(vars[a], 5) + getFunctionForRound(round)(vars[b], vars[c], vars[d]) +
           vars[e] + getConstantForRound(round) + roundVars.W[round];

  std::rotate(vars.rbegin(), vars.rbegin() + 1, vars.rend());
  vars[c] = rotl(vars[c], 30);
  vars[a] = T;
}


BlockVector computeHash(HashVector&& previousHash, const BlockVector::const_iterator& blockStart, const BlockVector::const_iterator& blockEnd) {
  BlockVector w;

  std::copy(blockStart, blockEnd, std::back_inserter(w));

  for (auto i = 16; i < 80; i++) {
    w.emplace_back(rotl(w[i - 3] ^ w[i - 8] ^ w[i - 14] ^ w[i - 16], 1));
  }

  RoundVariables roundVars(previousHash, std::move(w));
  for (auto i = 0; i < 80; i++) {
    hashRound(roundVars, i);
  }

  HashVector newHash = roundVars.getHash();
  std::transform(newHash.begin(), newHash.end(), previousHash.begin(), newHash.begin(),
                 std::plus<uint32_t>());

  return newHash;
}


HashVector sha1(const BlockVector& input) {
  HashVector hashes = {0x67452301, 0xefcdab89, 0x98badcfe, 0x10325476, 0xc3d2e1f0};

  for (auto it = input.begin(); it < input.end(); it += intsPerBlock) {
    hashes = computeHash(std::move(hashes), it, it + intsPerBlock);
  }

  return hashes;
}


typedef std::pair<BlockVector, uint64_t> Input;


BlockVector PadInput(Input&& input) {
  BlockVector padded(std::get<0>(input));
  uint64_t byteCount = std::get<1>(input);

  // Add the end-of-message marker
  if ((byteCount % int32Size) != 0) {
    unsigned char marker = 0x80u;

    uint32_t last = padded.back();
    last = accumulateUint(last, marker);
    last = last << ((int32Size - (byteCount % int32Size) - 1) * 8);

    padded.back() = last;
  } else {
    padded.emplace_back(0x80000000);
  }

  unsigned int paddingRequired = intsPerBlock - ((padded.size() + 2) % intsPerBlock);
  padded.insert(padded.end(), paddingRequired, '\0');

  // Add the number of bits in the original message
  byteCount *= 8;
  uint32_t messageLenLSB = static_cast<uint32_t>(byteCount & 0xFFFFFFFF);
  uint32_t messageLenMSB = static_cast<uint32_t>((byteCount >> (int32Size * 8)) & 0xFFFFFFFF);

  // The standard assumes big-endian
  padded.emplace_back(messageLenMSB);
  padded.emplace_back(messageLenLSB);

  return padded;
}


Input GetInput(std::istream& in) {
  int c;
  uint64_t count = 0u;
  uint32_t next = 0u;

  BlockVector v;
  v.reserve(intsPerBlock);

  while ((c = in.get()) != EOF) {
    unsigned char charInput = static_cast<unsigned char>(c);
    next = accumulateUint(next, charInput);

    if ((++count % int32Size) == 0) {
      v.emplace_back(next);
      next = 0;
    }
  }

  if (count > 0) {
    v.emplace_back(next);
  }

  return std::make_pair(std::move(v), count);
}


void hashSource(std::istream& in, const char* source) {
  HashVector hashed = sha1(PadInput(move(GetInput(in))));

  std::cout << std::hex << std::setfill('0') << std::setw(int32Size);
  for (auto i : hashed) {
    std::cout << i;
  }

  std::cout << "  " << source << std::endl;
}


} // namespace


int main(int argc, char** argv) {
  if (argc == 1) {
    hashSource(std::cin, "-");
    return 0;
  }

  int exitCode = 0;

  for (auto i = 1; i < argc; i++) {
    std::ifstream in(argv[i]);

    if (in.fail()) {
      std::cerr << argv[0] << ": " << argv[i] << ": no such file or directory" << std::endl;
      in.close();
      exitCode = 1;
      continue;
    }

    hashSource(in, argv[i]);
    in.close();
  }

  return exitCode;
}

c++

c++11

cryptography

回答 1

Code Review用户

回答已采纳

发布于 2015-01-26 22:36:44

在不进行全面审查的情况下，以下是您可以做的一些改进代码的工作：

通常情况下，从标准库中显式地包含所需的标头是一个很好的实践，但是<iostream>保证包括<istream>、<ostream>和<ios>；而且它很容易记住。因此，你可以摆脱明确的#includes的<istream>，<ostream>和<ios>。
与将所有内容放在一个文件中并将除main之外的所有东西都放在一个匿名namespace中不同，您可以用可重用的代码片段(已经具有可重用的函数，这很好)来破坏代码，并创建一个头来放置它们的原型。
const auto int32Size = 4;和朋友可能会更安全。如果您知道只有在32位整数存在时才能工作，那么我会这样写它: constexpr std::int32_t in32Size =std：：int32_t；虽然使用auto通常是一种很好的实践，特别是在编写泛型代码时，您实际上是在为一个非常特定的类型编写代码，即std::int32_t (即使在名称中，使用其他任何东西都可能会产生误导)。因此，我将通过使用std::int32_t并让sizeof推断其大小来记录这一点。这样，你就永远不会错。
const auto int32Bits = int32Size * 8;也可以用以下内容替换: std::int32_t int32Bits = std::numeric_limits::digits；如果您希望获得内置整数和浮点类型的类型安全和实现安全信息(如果类型重载，则更多地使用std::numeric_limits )。
请注意，在前面的两个点中，我都使用了constexpr。这是一种C++11标准方法，要求编译器在编译时计算值，以便在需要编译时常量表达式的更多上下文中使用。
以下两行可以从少量的C++11中受益:ty胡枝子std::vector BlockVector；ty胡枝子std::vector HashVector；使用新的别名模板，它们可以重写为:使用BlockVector = std::vector；使用HashVector = std::vector；虽然旧版本和新版本在语义上是相同的，但是我发现新的别名模板语法更接近变量命名空间分配和赋值语法，这有助于不必记住与函数指针变得尴尬的typedef语法。更重要的是，这个使用声明可以模板化，不像typedef。
通常认为，通过值将内置类型和小类型(小于两个指针的大小)传递给函数，而不是通过const&传递它们，这是一个很好的实践。其基本原理是，如果编译器不需要考虑引用引入的可能的混叠问题，则可以更容易地优化寄存器中的类型。
这一行感觉不安全: HashVector&& getHash() {返回std::move(workingVars)；}一般说来，只有在当前实例保证是临时的情况下，才应该通过rvalue返回引用。为了做到这一点，您可以对函数本身进行rvlue限定: HashVector&& getHash() && { return：：move(WorkingVars)；}，但是对于lvalue-引用，它可能还需要另一个重载。实际上，还引入了rvalue-引用，以便编写简单的代码。也就是说，您的函数应该是: HashVector getHash() {返回workingVars；}编译器将完成查找临时内容的工作，并在适当时移动。

票数 4

页面原文内容由Code Review提供。腾讯云小微IT领域专用引擎提供翻译支持

原文链接：

https://codereview.stackexchange.com/questions/78690

复制

相似问题

问sha1sum在C++中的实现
EN

回答 1

Code Review用户

社区

活动

圈层

关于

腾讯云开发者

热门产品

热门推荐

更多推荐

问sha1sum在C++中的实现EN

回答 1

Code Review用户

社区

活动

圈层

关于

腾讯云开发者

热门产品

热门推荐

更多推荐

问sha1sum在C++中的实现
EN