首页
学习
活动
专区
圈层
工具
发布
社区首页 >问答首页 >netlink序列号无序

netlink序列号无序
EN

Stack Overflow用户
提问于 2018-07-17 06:02:40
回答 1查看 448关注 0票数 1

我正在使用netlinkgenetlink Go包编写一个与MAC80211_HWSIM netlink系列函数交互的实用程序。它工作得很好。差不多了。

我发现,如果我执行GET_RADIO调用,那么家族中的下一个调用将无法验证,因为响应的序列号与请求的序列号不匹配。具体地说,我看到的模式是

代码语言:javascript
复制
GET_RADIO request:  seq=655
GET_RADIO response: seq=655
DEL_RADIO request:  seq=656
DEL_RADIO response: seq=655

GET_RADIO之后发出哪个命令并不重要,同样的模式也会出现。如果GET_RADIO不在其中,则序列号与预期完全相同。

现有的一个命令是用C编写的,使用libnllibgenl,我对其进行了修改以运行多个命令。据我所知,该实现做了非常类似的事情,但序列号没有问题,所以我不倾向于认为问题出在内核模块上。但是strace清楚地表明内核正在响应一个无序的编号(请原谅过度的侧滚):

代码语言:javascript
复制
socket(AF_NETLINK, SOCK_RAW, NETLINK_GENERIC) = 3
bind(3, {sa_family=AF_NETLINK, nl_pid=0, nl_groups=00000000}, 12) = 0
getsockname(3, {sa_family=AF_NETLINK, nl_pid=18357, nl_groups=00000000}, [112->12]) = 0
sendmsg(3, {msg_name={sa_family=AF_NETLINK, nl_pid=0, nl_groups=00000000}, msg_namelen=12, msg_iov=[{iov_base={{len=40, type=0x10 /* NLMSG_??? */, flags=NLM_F_REQUEST, seq=2596996163, pid=18357}, "\3\1\0\0\23\0\2\0MAC80211_HWSIM\0\0"}, iov_len=40}], msg_iovlen=1, msg_controllen=0, msg_flags=0}, 0) = 40
recvmsg(3, {msg_name={sa_family=AF_NETLINK, nl_pid=0, nl_groups=00000000}, msg_namelen=112->12, msg_iov=[{iov_base=[{{len=224, type=0x10 /* NLMSG_??? */, flags=0, seq=2596996163, pid=18357}, "\1\2\0\0\23\0\2\0MAC80211_HWSIM\0\0\6\0\1\0\31\0\0\0"...}, {{len=0, type=0 /* NLMSG_??? */, flags=0, seq=0, pid=0}}], iov_len=4096}], msg_iovlen=1, msg_controllen=0, msg_flags=0}, MSG_PEEK) = 224
recvmsg(3, {msg_name={sa_family=AF_NETLINK, nl_pid=0, nl_groups=00000000}, msg_namelen=112->12, msg_iov=[{iov_base=[{{len=224, type=0x10 /* NLMSG_??? */, flags=0, seq=2596996163, pid=18357}, "\1\2\0\0\23\0\2\0MAC80211_HWSIM\0\0\6\0\1\0\31\0\0\0"...}, {{len=0, type=0 /* NLMSG_??? */, flags=0, seq=0, pid=0}}], iov_len=4096}], msg_iovlen=1, msg_controllen=0, msg_flags=0}, 0) = 224
sendmsg(3, {msg_name={sa_family=AF_NETLINK, nl_pid=0, nl_groups=00000000}, msg_namelen=12, msg_iov=[{iov_base={{len=28, type=0x19 /* NLMSG_??? */, flags=NLM_F_REQUEST|NLM_F_ACK, seq=2596996164, pid=18357}, "\6\1\0\0\10\0\n\0T\0\0\0"}, iov_len=28}], msg_iovlen=1, msg_controllen=0, msg_flags=0}, 0) = 28
recvmsg(3, {msg_name={sa_family=AF_NETLINK, nl_pid=0, nl_groups=00000000}, msg_namelen=112->12, msg_iov=[{iov_base=[{{len=48, type=0x19 /* NLMSG_??? */, flags=0, seq=2596996164, pid=18357}, "\6\1\0\0\10\0\n\0T\0\0\0\10\0\t\0\1\0\0\0\t\0\21\0phy84\0\0\0"}, {{len=0, type=0 /* NLMSG_??? */, flags=0, seq=0, pid=0}}], iov_len=4096}], msg_iovlen=1, msg_controllen=0, msg_flags=0}, MSG_PEEK) = 48
recvmsg(3, {msg_name={sa_family=AF_NETLINK, nl_pid=0, nl_groups=00000000}, msg_namelen=112->12, msg_iov=[{iov_base=[{{len=48, type=0x19 /* NLMSG_??? */, flags=0, seq=2596996164, pid=18357}, "\6\1\0\0\10\0\n\0T\0\0\0\10\0\t\0\1\0\0\0\t\0\21\0phy84\0\0\0"}, {{len=0, type=0 /* NLMSG_??? */, flags=0, seq=0, pid=0}}], iov_len=4096}], msg_iovlen=1, msg_controllen=0, msg_flags=0}, 0) = 48
sendmsg(3, {msg_name={sa_family=AF_NETLINK, nl_pid=0, nl_groups=00000000}, msg_namelen=12, msg_iov=[{iov_base={{len=28, type=0x19 /* NLMSG_??? */, flags=NLM_F_REQUEST|NLM_F_ACK, seq=2596996165, pid=18357}, "\5\1\0\0\10\0\n\0T\0\0\0"}, iov_len=28}], msg_iovlen=1, msg_controllen=0, msg_flags=0}, 0) = 28
recvmsg(3, {msg_name={sa_family=AF_NETLINK, nl_pid=0, nl_groups=00000000}, msg_namelen=112->12, msg_iov=[{iov_base=[{{len=36, type=NLMSG_ERROR, flags=0, seq=2596996164, pid=18357}, "\0\0\0\0\34\0\0\0\31\0\5\0D\4\313\232\265G\0\0"}, {{len=0, type=0 /* NLMSG_??? */, flags=0, seq=0, pid=0}}], iov_len=4096}], msg_iovlen=1, msg_controllen=0, msg_flags=0}, MSG_PEEK) = 36
recvmsg(3, {msg_name={sa_family=AF_NETLINK, nl_pid=0, nl_groups=00000000}, msg_namelen=112->12, msg_iov=[{iov_base=[{{len=36, type=NLMSG_ERROR, flags=0, seq=2596996164, pid=18357}, "\0\0\0\0\34\0\0\0\31\0\5\0D\4\313\232\265G\0\0"}, {{len=0, type=0 /* NLMSG_??? */, flags=0, seq=0, pid=0}}], iov_len=4096}], msg_iovlen=1, msg_controllen=0, msg_flags=0}, 0) = 36
close(3)                                = 0
+++ exited with 0 +++

请注意,每个sendmsg()都有两个recvmsg()调用,因为第一个调用只是执行MSG_PEEK以查看是否有可用的答案。在三组请求和响应中,第一组(序列...163)只是获取家庭信息。第二个(序列164)是执行GET_RADIO (\6)调用,第三个是具有失序响应的调用:这个DEL_RADIO (\5)调用发送一个带有...165序列号的消息,但是返回的响应(否则格式良好)具有与前一个命令相匹配的序列号:...164。

因为这是Go,所以一次有多个线程处于活动状态,但是上面strace输出中的所有调用都来自同一个线程。跟踪中唯一遗漏的调用是对futex()的调用。

因为C版本使用了libnllibgenl提供的回调机制,所以它在不同于请求来源的线程中处理响应。请求跟踪:

代码语言:javascript
复制
socket(AF_NETLINK, SOCK_RAW|SOCK_CLOEXEC, NETLINK_GENERIC) = 3
setsockopt(3, SOL_SOCKET, SO_SNDBUF, [32768], 4) = 0
setsockopt(3, SOL_SOCKET, SO_RCVBUF, [32768], 4) = 0
getpid()                                = 21338
bind(3, {sa_family=AF_NETLINK, nl_pid=-1300212902, nl_groups=00000000}, 12) = 0
getsockname(3, {sa_family=AF_NETLINK, nl_pid=-1300212902, nl_groups=00000000}, [12]) = 0
sendmsg(3, {msg_name={sa_family=AF_NETLINK, nl_pid=0, nl_groups=00000000}, msg_namelen=12, msg_iov=[{iov_base={{len=20, type=0x10 /* NLMSG_??? */, flags=NLM_F_REQUEST|NLM_F_ACK|0x300, seq=1530817156, pid=2994754394}, "\3\1\0\0"}, iov_len=20}], msg_iovlen=1, msg_controllen=0, msg_flags=0}, 0) = 20
recvmsg(3, {msg_name={sa_family=AF_NETLINK, nl_pid=0, nl_groups=00000000}, msg_namelen=12, msg_iov=[{iov_base=[{{len=116, type=0x10 /* NLMSG_??? */, flags=NLM_F_MULTI, seq=1530817156, pid=2994754394}, "\1\2\0\0\v\0\2\0nlctrl\0\0\6\0\1\0\20\0\0\0\10\0\3\0\2\0\0\0"...}, {{len=96, type=0x10 /* NLMSG_??? */, flags=NLM_F_MULTI, seq=1530817156, pid=2994754394}, "\1\2\0\0\16\0\2\0VFS_DQUOT\0\0\0\6\0\1\0\21\0\0\0\10\0\3\0"...}, {{len=104, type=0x10 /* NLMSG_??? */, flags=NLM_F_MULTI, seq=1530817156, pid=2994754394}, "\1\2\0\0\17\0\2\0acpi_event\0\0\6\0\1\0\23\0\0\0\10\0\3\0"...}, {{len=108, type=0x10 /* NLMSG_??? */, flags=NLM_F_MULTI, seq=1530817156, pid=2994754394}, "\1\2\0\0\22\0\2\0thermal_event\0\0\0\6\0\1\0\24\0\0\0"...}, {{len=112, type=0x10 /* NLMSG_??? */, flags=NLM_F_MULTI, seq=1530817156, pid=2994754394}, "\1\2\0\0\20\0\2\0tcp_metrics\0\6\0\1\0\25\0\0\0\10\0\3\0"...}, {{len=112, type=0x10 /* NLMSG_??? */, flags=NLM_F_MULTI, seq=1530817156, pid=2994754394}, "\1\2\0\0\16\0\2\0TASKSTATS\0\0\0\6\0\1\0\26\0\0\0\10\0\3\0"...}, {{len=2076, type=0x10 /* NLMSG_??? */, flags=NLM_F_MULTI, seq=1530817156, pid=2994754394}, "\1\2\0\0\f\0\2\0nl80211\0\6\0\1\0\30\0\0\0\10\0\3\0\1\0\0\0"...}, {{len=224, type=0x10 /* NLMSG_??? */, flags=NLM_F_MULTI, seq=1530817156, pid=2994754394}, "\1\2\0\0\23\0\2\0MAC80211_HWSIM\0\0\6\0\1\0\31\0\0\0"...}, {{len=0, type=0 /* NLMSG_??? */, flags=0, seq=0, pid=0}}], iov_len=16384}], msg_iovlen=1, msg_controllen=0, msg_flags=0}, 0) = 2948
recvmsg(3, {msg_name={sa_family=AF_NETLINK, nl_pid=0, nl_groups=00000000}, msg_namelen=12, msg_iov=[{iov_base=[{{len=20, type=NLMSG_DONE, flags=NLM_F_MULTI, seq=1530817156, pid=2994754394}, "\0\0\0\0"}, {{len=0, type=0 /* NLMSG_??? */, flags=0, seq=0, pid=65542}}], iov_len=16384}], msg_iovlen=1, msg_controllen=0, msg_flags=0}, 0) = 20
mmap(NULL, 8392704, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS|MAP_STACK, -1, 0) = 0x7fdb752ce000
mprotect(0x7fdb752ce000, 4096, PROT_NONE) = 0
clone(child_stack=0x7fdb75acdff0, flags=CLONE_VM|CLONE_FS|CLONE_FILES|CLONE_SIGHAND|CLONE_THREAD|CLONE_SYSVSEM|CLONE_SETTLS|CLONE_PARENT_SETTID|CLONE_CHILD_CLEARTID, parent_tidptr=0x7fdb75ace9d0, tls=0x7fdb75ace700, child_tidptr=0x7fdb75ace9d0) = 21339
sendmsg(3, {msg_name={sa_family=AF_NETLINK, nl_pid=0, nl_groups=00000000}, msg_namelen=12, msg_iov=[{iov_base={{len=28, type=0x19 /* NLMSG_??? */, flags=NLM_F_REQUEST|NLM_F_ACK, seq=1530817157, pid=2994754394}, "\6\1\0\0\10\0\n\0_\0\0\0"}, iov_len=28}], msg_iovlen=1, msg_controllen=0, msg_flags=0}, 0) = 28
nanosleep({tv_sec=2, tv_nsec=0}, 0x7ffff64ac200) = 0
write(1, "Deleting radio with id '95'...\n", 31) = 31
sendmsg(3, {msg_name={sa_family=AF_NETLINK, nl_pid=0, nl_groups=00000000}, msg_namelen=12, msg_iov=[{iov_base={{len=28, type=0x19 /* NLMSG_??? */, flags=NLM_F_REQUEST|NLM_F_ACK, seq=1530817158, pid=2994754394}, "\5\1\0\0\10\0\n\0_\0\0\0"}, iov_len=28}], msg_iovlen=1, msg_controllen=0, msg_flags=0}, 0) = 28
nanosleep({tv_sec=2, tv_nsec=0},  <unfinished ...>) = ?
+++ exited with 0 +++

和响应跟踪:

代码语言:javascript
复制
recvmsg(3, {msg_name={sa_family=AF_NETLINK, nl_pid=0, nl_groups=00000000}, msg_namelen=12, msg_iov=[{iov_base=[{{len=48, type=0x19 /* NLMSG_??? */, flags=0, seq=1530817157, pid=2994754394}, "\6\1\0\0\10\0\n\0_\0\0\0\10\0\t\0\1\0\0\0\t\0\21\0phy95\0\0\0"}, {{len=0, type=0 /* NLMSG_??? */, flags=0, seq=0, pid=0}}], iov_len=16384}], msg_iovlen=1, msg_controllen=0, msg_flags=0}, 0) = 48
epoll_wait(4, [{EPOLLIN, {u32=3, u64=3}}], 32, -1) = 1
recvmsg(3, {msg_name={sa_family=AF_NETLINK, nl_pid=0, nl_groups=00000000}, msg_namelen=12, msg_iov=[{iov_base=[{{len=36, type=NLMSG_ERROR, flags=0, seq=1530817157, pid=2994754394}, "\0\0\0\0\34\0\0\0\31\0\5\0\205j>[ZS\200\262"}, {{len=1114121, type=0x6870 /* NLMSG_??? */, flags=NLM_F_REQUEST|NLM_F_ECHO|NLM_F_DUMP_INTR|NLM_F_DUMP_FILTERED|0x3940, seq=53, pid=0}, "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"...}], iov_len=16384}], msg_iovlen=1, msg_controllen=0, msg_flags=0}, 0) = 36
fstat(1, {st_mode=S_IFCHR|0620, st_rdev=makedev(136, 0), ...}) = 0
write(1, "new SSID defined to interface 95"..., 33) = 33
epoll_wait(4, [{EPOLLIN, {u32=3, u64=3}}], 32, -1) = 1
recvmsg(3, {msg_name={sa_family=AF_NETLINK, nl_pid=0, nl_groups=00000000}, msg_namelen=12, msg_iov=[{iov_base=[{{len=36, type=NLMSG_ERROR, flags=0, seq=1530817158, pid=2994754394}, "\0\0\0\0\34\0\0\0\31\0\5\0\206j>[ZS\200\262"}, {{len=1114121, type=0x6870 /* NLMSG_??? */, flags=NLM_F_REQUEST|NLM_F_ECHO|NLM_F_DUMP_INTR|NLM_F_DUMP_FILTERED|0x3940, seq=53, pid=0}, "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"...}], iov_len=16384}], msg_iovlen=1, msg_controllen=0, msg_flags=0}, 0) = 36
write(1, "Successfully deleted device with"..., 39) = 39
exit_group(0)                                       = ?
+++ exited with 0 +++

对我来说唯一相关的事情(除了线程结构)是它使用了不同的标志(C代码中的NLM_F_DUMP_INTR|NLM_F_DUMP_FILTERED,Go代码中的0 )。

我不确定我还能用什么工具来解决这个问题,因为我不知道这些序列号是如何构造的。在我看来,内核模块、Go包或Go代码中的代码没有任何异常错误,但接口的使用方式和实际使用方式之间总是可能存在不匹配。这个问题是否对任何人来说都很熟悉,或者有人对如何在这个问题上取得进展有什么想法?

EN

回答 1

Stack Overflow用户

发布于 2018-07-18 02:58:04

我仍然不明白为什么会发生这种情况,但似乎TL;DR的解决方案是读取下一个数据包:它将具有具有正确序列号的相同数据。

我尝试用不同的netlink package编写程序,发现虽然它有相同的问题,但遇到这种情况时,它会重复recvmsg()调用。执行此操作的代码如下:

https://github.com/vishvananda/netlink/blob/a06dabf/nl/nl_linux.go#L425

引入该代码的commit没有解释为什么这个循环是正确的,但是在另一个包中模拟它可以解决这个问题。

票数 0
EN
页面原文内容由Stack Overflow提供。腾讯云小微IT领域专用引擎提供翻译支持
原文链接:

https://stackoverflow.com/questions/51370678

复制
相关文章

相似问题

领券
问题归档专栏文章快讯文章归档关键词归档开发者手册归档开发者手册 Section 归档