我有脚本来显示我的repos的提交统计和合并统计,它们可以工作。我是为我的个人使用而写的,也是因为我有兴趣在我的git回复中找到趋势。
这个脚本报告有关提交的统计信息(数字、单词的平均长度等)。可以使用git-rev-list选项选择相关提交。
特征与时代
count:提交的报告号(不是性能问题)len:以提交消息和提交散列(1561年提交)为单位的报告长度len min、len max和len avg:报告最小、最大或平均提交消息长度的单词和提交哈希(~10-15 s表示相同)基准测试使用bash的
time在我的dotfiles回购上运行
使用for-循环的一个以前的实施具有类似的性能.
显然,算法是O(n)。对于每天的使用来说,它们还是太慢了.
#! /usr/bin/env bash
set -euo pipefail
USAGE='[-h] (count | len [min|max|avg]) [rev-list options]
Display commit statistics
Filter commits based on [rev-list options]'
SUBDIRECTORY_OK=true
# source git-sh-setup for some helpers
set +u
source "$(git --exec-path)"/git-sh-setup
set -u
SIZER=(
wc
# count words
-w
)
size() {
local commit="$1"
git log "$commit" -1 --format=%B | "${SIZER[@]}" | tr -d ' '
}
commits_list() {
command=(
git
rev-list
# start somewhere
--all
)
if (($# > 0)) ; then
command+=("$@")
fi
"${command[@]}" 2>/dev/null
}
commit_count() {
commits_list "$@" | wc -l | tr -d ' '
}
commit_len() {
commits_list "$@" |
while read c ; do
size "$c" | tr -d '[:space:]'
printf ' %s\n' "$c"
done
}
commit_len_min() {
commit_len "$@" |
sort -n |
head -n 1
}
commit_len_max() {
commit_len "$@" |
sort -rn |
head -n 1
}
commit_len_avg() {
local num=0
{
printf '%s\n' '5k'
while read c ; do
((++num))
size "$c"
((num >= 2)) && printf '%s\n' '+'
done < <(commits_list "$@")
printf '%s\n' "$num" '/p'
} | dc
}
main() {
(($# >= 1)) || usage
case "$1" in
count) commit_count "${@:2}" ;;
len)
if (($# >= 2)); then
case "$2" in
max|min|avg) commit_len_"$2" "${@:3}" ;;
*) commit_len "${@:2}" ;;
esac
else
commit_len "${@:2}"
fi
;;
*) usage ;;
esac
}
main "$@"Shell脚本很难描述,我一直无法识别瓶颈(尽管commit_len似乎是一个很好的起点)。
我定期运行shellcheck。
发布于 2019-08-14 20:28:17
通过将awk与一些创造性的格式结合起来,我成功地大大提高了性能:既然所有东西都是awk,即使我的1600次提交,脚本的速度也低于0.3s。
#! /usr/bin/env bash
set -euo pipefail
USAGE='[-h] (count | len [min|max|avg]) [rev-list options]
Display commit statistics
Filter commits based on [rev-list options]'
SUBDIRECTORY_OK=true
# source git-sh-setup for some helpers
set +u
source "$(git --exec-path)"/git-sh-setup
set -u
commits_list() {
command=(
git
log
--pretty'='format:\a%n%H\t%s %b'
# start somewhere
--all
)
if (($# > 0)) ; then
command+=("$@")
fi
"${command[@]}" |
awk '
/'\a''/ && NR != 1 { printf "\n"; next }
{ printf "%s ", $0 }
END { printf "\n" }
'
}
commit_count() {
git rev-list --all --count "$@"
}
commit_len() {
commits_list "$@" |
awk -F\t' '{ print split($2,_," "), $1 }'
}
commit_len_min() {
commit_len "$@" |
sort -n |
head -n 1
}
commit_len_max() {
commit_len "$@" |
sort -rn |
head -n 1
}
commit_len_avg() {
commit_len "$@" |
awk '
{ sum += $1 }
END { print sum/NR }
'
}
main() {
(($# >= 1)) || usage
case "$1" in
count) commit_count "${@:2}" ;;
len)
if (($# >= 2)); then
case "$2" in
max|min|avg) commit_len_"$2" "${@:3}" ;;
*) commit_len "${@:2}" ;;
esac
else
commit_len "${@:2}"
fi
;;
*) usage ;;
esac
}
main "$@"https://codereview.stackexchange.com/questions/226071
复制相似问题