我编写了一个简单的库,用于在C++中读取RIFF编码的文件。
我试着坚持现代实践,并提供了一个惯用的API,但有一个部分对我来说就像大拇指一样:获取“块”内容的方法是调用data()方法,该方法返回一个std::vector<char>。访问数据的大多数其他方法不需要从流中完全读取,这是唯一的例外。
我想知道是否有更好的方法来做到这一点。
riffcpp.hpp:
#ifndef RIFFCPP_H
#define RIFFCPP_H
#include <array>
#include <cstdint>
#include <istream>
#include <iterator>
#include <vector>
namespace riffcpp {
/**
Represents a FourCC
This is a sequence of four bytes used to identify the various types of RIFF
chunks
*/
using FourCC = std::array<char, 4>;
/// The `RIFF` FourCC, used to identify toplevel chunks
constexpr FourCC riff_id = {'R', 'I', 'F', 'F'};
/// The `LIST` FourCC, used to identify chunks that contain other chunks
constexpr FourCC list_id = {'L', 'I', 'S', 'T'};
class ChunkIt;
/**
Represents a RIFF chunk
Every chunk has a four byte identifier (FourCC) and some contents.
Depending on the value of the identifier, the chunk may contain other chunks
as its contents, and in those cases a second FourCC is used to distinguish
the chunk type.
*/
class Chunk {
std::istream &m_stream;
std::streampos m_pos;
public:
/**
Reads a chunk from the specified stream position
The chunk's data is not read initially, it is only loaded when requested
via the various methods provided.
The stream needs to be able to seek to arbitrary positions.
*/
Chunk(std::istream &stream, std::streampos pos);
/// The chunk's identifier
FourCC id();
/// If the chunk contains other chunks, this is its type FourCC
FourCC type();
/// Returns the size of the chunk's contents in bytes
std::uint32_t size();
/**
If this chunk contains other chunks, returns an iterator to the first
chunk contained
`no_chunk_id` is used for chunks which have no chunk id but still contain
subchunks, like `seqt` from DirectMusic
*/
ChunkIt begin(bool no_chunk_id = false);
/**
If this chunk contains other chunks, returns an iterator pointing past the
last chunk contained
*/
ChunkIt end();
/**
Returns the raw contents of the chunk
*/
std::vector<char> data();
};
/**
Provides a way to iterate over subchunks
*/
class ChunkIt {
std::streampos m_pos; ///< Position of the chunk in the stream
std::istream &m_stream; ///< Stream of the chunk
public:
/// Creates an iterator starting from the specified stream position
ChunkIt(std::istream &stream, std::streampos pos);
/// Returns whether two iterators point to the same chunk
bool operator==(const ChunkIt &a) const;
/// Returns whether two iterators do not point to the same chunk
bool operator!=(const ChunkIt &a) const;
/// Returns the chunk pointed by the iterator
Chunk operator*() const;
/// Moves the iterator ahead, to point to the following iterator
ChunkIt &operator++();
/**
Moves the iterator ahead, to point to the following iterator and returns
an iterator to the current position
*/
ChunkIt operator++(int);
};
} // namespace riffcpp
namespace std {
template <> struct iterator_traits<riffcpp::ChunkIt> {
using value_type = riffcpp::Chunk;
using pointer = riffcpp::Chunk *;
using iterator_category = std::input_iterator_tag;
};
} // namespace std
#endif // RIFFCPP_Hriffcpp.cpp:
#include <riffcpp.hpp>
riffcpp::Chunk::Chunk(std::istream &stream, std::streampos pos)
: m_stream(stream), m_pos(pos) {}
riffcpp::FourCC riffcpp::Chunk::id() {
m_stream.seekg(m_pos);
riffcpp::FourCC read_id;
m_stream.read(read_id.data(), read_id.size());
return read_id;
}
std::uint32_t riffcpp::Chunk::size() {
std::streamoff offs{4};
m_stream.seekg(m_pos + offs);
uint32_t read_size;
m_stream.read(reinterpret_cast<char *>(&read_size), 4);
return read_size;
}
riffcpp::FourCC riffcpp::Chunk::type() {
std::streamoff offs{8};
m_stream.seekg(m_pos + offs);
riffcpp::FourCC read_type;
m_stream.read(read_type.data(), read_type.size());
return read_type;
}
std::vector<char> riffcpp::Chunk::data() {
std::streamoff offs{8};
m_stream.seekg(m_pos + offs);
std::uint32_t data_size = size();
std::vector<char> read_data;
read_data.resize(data_size);
m_stream.read(read_data.data(), data_size);
return read_data;
}
riffcpp::ChunkIt riffcpp::Chunk::begin(bool no_chunk_id) {
std::streamoff offs{no_chunk_id ? 8 : 12};
return riffcpp::ChunkIt(m_stream, m_pos + offs);
}
riffcpp::ChunkIt riffcpp::Chunk::end() {
std::uint32_t sz = size();
std::streamoff offs{sz + sz % 2 + 8};
return riffcpp::ChunkIt(m_stream, m_pos + offs);
}
riffcpp::ChunkIt::ChunkIt(std::istream &stream, std::streampos pos)
: m_stream(stream), m_pos(pos) {}
bool riffcpp::ChunkIt::operator==(const ChunkIt &a) const {
return m_pos == a.m_pos;
}
bool riffcpp::ChunkIt::operator!=(const ChunkIt &a) const {
return !(*this == a);
}
riffcpp::Chunk riffcpp::ChunkIt::operator*() const {
return riffcpp::Chunk(m_stream, m_pos);
}
riffcpp::ChunkIt &riffcpp::ChunkIt::operator++() {
riffcpp::Chunk chunk(m_stream, m_pos);
std::uint32_t sz = chunk.size();
std::streamoff offs{sz + sz % 2 + 8};
m_pos += offs;
return *this;
}
riffcpp::ChunkIt riffcpp::ChunkIt::operator++(int) {
riffcpp::ChunkIt it(m_stream, m_pos);
riffcpp::Chunk chunk(m_stream, m_pos);
std::uint32_t sz = chunk.size();
std::streamoff offs{sz + sz % 2 + 8};
m_pos += offs;
return it;
}示例用法:
#include <cstdint>
#include <fstream>
#include <iomanip>
#include <iostream>
#include <string>
#include <riffcpp.hpp>
void print_indent(int indent) {
for (int j = 0; j < indent; j++) {
std::cout << " ";
}
}
void print_hex_dump(std::vector<char> &data, int indent) {
int i = 0;
for (char c : data) {
if (i % 16 == 0) {
print_indent(indent);
}
std::cout << std::setfill('0') << std::setw(2) << std::hex
<< (int)((unsigned char)c) << ' ';
if (i % 16 == 15) {
std::cout << '\n';
}
i++;
}
if (i % 16 != 0) {
i = i % 16;
for (; i < 16; i++) {
std::cout << "-- ";
}
}
std::cout << std::dec << '\n';
}
void print_chunks(riffcpp::Chunk &ch, int offs = 0) {
auto id = ch.id(); // Reads the chunk's id
auto size = ch.size(); // Reads the chunk's size
if (id == riffcpp::riff_id || id == riffcpp::list_id) {
// The chunk is either a 'RIFF' or a 'LIST', so it contains subchunks
print_indent(offs);
auto type = ch.type(); // Reads the chunk's type
std::cout << std::string(id.data(), 4) << " " << std::string(type.data(), 4)
<< " size: " << size << "\n";
// Iterate subchunks
for (auto ck : ch) {
print_chunks(ck, offs + 1);
}
} else {
// The chunk is an unknown type, provide an hexdump
auto data = ch.data();
print_indent(offs);
std::cout << std::string(id.data(), 4) << " size: " << size << "\n";
print_hex_dump(data, offs + 1);
}
}
int main(int argc, char *argv[]) {
std::ifstream stream(argv[1], std::ios::binary);
// Read the chunk from the current position
riffcpp::Chunk ch(stream, stream.tellg());
print_chunks(ch);
}链接到GitHub回购:https://github.com/frabert/riffcpp
发布于 2019-04-03 15:03:53
这里有个很严重的问题:
uint32_t read_size;
m_stream.read(reinterpret_cast<char *>(&read_size), 4);当为其endianness与文件endianness匹配的目标编译此代码时,我可以想象这是可行的。然而,当endianness相反时,您的结果将与预期的结果大不相同。
与其专门化std::iterator_traits,通常更容易的是只提供合适的成员类型,让非专门化的模板只做它的事情:
class ChunkIt {
using value_type = riffcpp::Chunk;
using reference = value_type&;
using pointer = value_type*;
using difference_type = std::ptrdiff_t;
using iterator_category = std::input_iterator_tag;
...
};将迭代器设置为内部类型的Chunk::iterator是传统做法。包含const_iterator类型也可能是个好主意。
可以减少迭代器中的代码重复。例如,我们应该从前增量和临时增量两个方面来实现后增量。
https://codereview.stackexchange.com/questions/216748
复制相似问题