受这个问题 on Puzzling.StackExchange的启发,我决定编写一个小python脚本来确定一个单词是否是BFF单词。下面是通过困扰用户斯蒂夫解决的参数,在程序模块docstring中也进行了讨论:
如果你删除了两个字母,你就会留下一只普通的家养宠物(“男人最好的朋友”,因此也就是“最好的朋友”--永远的好朋友)。此外,非BFF的单词显示了一个不同的但相关的模式--如果你删除两个字母,你就会留下一只不是普通的家养宠物的动物的触角!
我想要改进的主要是性能。我使用requests检索大约475个英语单词的列表,以确定它们是否是BFF单词。然后,我使用itertools.permutations获取所有可能的英语单词组合,根据它被测试的宠物的长度,然后根据已经存在的普通/不常见动物列表来测试它。
经过大约15分钟的运行,程序仍然在ab...部分的英语单词。我认为花的时间最长的部分是产生排列,所以我想知道是否有更好的方法来做到这一点。
"""
A function that determines if a string is a BFF Word.
If you remove two letters, and the remaining letters are
an anagram for a household pet, then it's a BFF Word.
Example:
GOURD -> remove UR -> DOG = Common Pet
ALBINO -> remove AB -> LION = Uncommon Pet
HELLO -> remove any 2 -> None = Neither
"""
from enum import Enum
from itertools import permutations
import requests
REPOSITORY = "https://raw.githubusercontent.com/dwyl/english-words/master/words.txt"
COMMON_PET = ["dog", "cat", "lizard", "rabbit", "hamster", "fish"]
UNCOMMON_PET = ["bear", "rhino", "lion", "tiger", "viper", "hyena"]
class PetType(Enum):
COMMON = 1
UNCOMMON = 2
NEITHER = 3
def type_of_pet(word: str) -> PetType:
"""
Returns the type of pet that the passed word is.
"""
for pet in COMMON_PET:
for string in permutations(word, len(pet)):
if pet == ''.join(string):
return PetType.COMMON
for pet in UNCOMMON_PET:
for string in permutations(word, len(pet)):
if pet == ''.join(string):
return PetType.UNCOMMON
return PetType.NEITHER
def main():
req = requests.get(REPOSITORY)
if req.status_code == 200:
words = req.text.split()
for word in words:
print(f"Word: {word.lower()} | Type of Pet: {type_of_pet(word.lower())}")
if __name__ == "__main__":
main()发布于 2020-03-27 02:40:16
我能想到的最简单的提速就是一张数字母的通行证。换句话说:将collections.Counter()应用于所讨论的单词,并为这两种宠物类型保留一个预先计算好的Counters元组。
扼杀你的表现的是秩序--有很多,很多,很多来自permutations的重新排序的结果,但是它们实际上并不重要,因为你在处理一个字谜。所以当你和上面提到的Counters进行比较时,请检查一下
下面是一个非常粗略的实现,似乎是快速的:
from collections import Counter
import requests
class Pet:
__slots__ = ('name', 'counter', 'is_common', 'letters')
def __init__(self, name: str, is_common: bool):
self.name, self.is_common = name, is_common
self.counter = Counter(self.name)
self.letters = set(self.counter.keys())
def matches(self, word: str) -> bool:
if len(word) != 2 + len(self.name): return False
wcounter = Counter(word)
total = 0
for letter in self.letters | set(wcounter.keys()):
diff = wcounter[letter] - self.counter[letter]
if diff < 0: return False
total += diff
if total > 2: return False
return total == 2
def __str__(self): return self.name
pets = [
*(Pet(name, True) for name in ('dog', 'cat', 'lizard', 'rabbit', 'hamster', 'fish')),
*(Pet(name, False) for name in ('bear', 'rhino', 'lion', 'tiger', 'viper', 'hyena')),
]
print('Downloading...', end=' ')
resp = requests.get('https://github.com/dwyl/english-words/blob/master/words.txt?raw=true')
resp.raise_for_status()
words = resp.text.splitlines()
print(f'{len(words)} downloaded.')
for word in words:
for pet in pets:
if pet.matches(word.lower()):
print(f'{word} -> {pet} = {"Common" if pet.is_common else "Uncommon"} Pet')它可以通过使用多线程等来加速。
发布于 2020-03-27 06:59:49
不要寻找符合规则的单词!你已经知道规矩了。使用它生成BFF单词。也就是说,从一个普通的宠物开始,过滤掉所有不长两个字母或者没有普通宠物中所有字母的单词。结果是列出了这只宠物的BFF单词。非BFF单词使用相同的规则生成,但从不常见的宠物开始。大约125毫秒。
import random
from collections import Counter
COMMON_PET = ["dog", "cat", "lizard", "rabbit", "hamster", "fish"]
UNCOMMON_PET = ["bear", "rhino", "lion", "tiger", "viper", "hyena"]
def BFF_word(pet, word_list):
word_len = len(pet) + 2
count = {letter:pet.count(letter) for letter in pet}
# only keep words that have the right length, no punctuation,
# and the right numbers of letters, and also don't contain
# the common pet, e.g. 'rabbited' -> 'rabbit' (too easy).
BFFs = [word for word in word_list
if len(word) == word_len
if word.isalpha()
if all(count[letter]<=word.count(letter) for letter in count)
if pet not in word
]
# uncomment to see how many BFFs there are and the first few
#print(pet, len(BFFs), BFFs[:5])
return random.choice(BFFs)
def main():
# I just used a local word list
with open("/usr/share/dict/words") as f:
words = [line.strip().lower() for line in f]
print("BFF words")
for pet in COMMON_PET:
word = BFF_word(pet, words)
print(f'{word} -> {pet}')
print("non-BFF words")
for pet in UNCOMMON_PET:
word = BFF_word(pet, words)
print(f'{word} -> {pet}')
if __name__ == "__main__":
main()发布于 2020-03-28 03:15:38
现在来看看完全不同的东西: C中的一个实现。
这有点简单和愚蠢,对于所有意图和目的,它立即执行。它没有任何散列映射或散列集。它跟踪每个宠物,一个字母频率计数阵列是稀疏的-它技术上跟踪整个ASCII-扩展的范围,以提高效率。
这作出了一些明目张胆的假设:
words.txt已被下载#include <fcntl.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
#include <sys/stat.h>
#include <unistd.h>
#define FILENAME "words.txt"
typedef struct {
const bool common; // Is this a common pet?
const char *name; // Pet's name, capitalized
int len, // Length of pet's name, excluding null
*counts; // Array of letter frequencies
} Pet;
// Assume ASCII everywhere; this is the number of symbols whose frequency we count
#define N_COUNTS 256
// The size of a frequency-counting array for the above character set
#define N_COUNT_BYTES (N_COUNTS * sizeof(int))
// The number of bytes if we only care about counting the upper-case alphabet
#define BYTES_TO_Z ((1 + (int)'Z') * sizeof(int))
// The number of letters that the word must lose to get to the pet name
#define COUNT_DIFF 2
static Pet pets[] = {
{ true, "DOG" },
{ true, "CAT" },
{ true, "LIZARD" },
{ true, "RABBIT" },
{ true, "HAMSTER" },
{ true, "FISH" },
{ false, "BEAR" },
{ false, "RHINO" },
{ false, "LION" },
{ false, "TIGER" },
{ false, "VIPER" },
{ false, "HYENA" },
};
#define N_PETS (sizeof(pets)/sizeof(Pet))
static void init_pet(Pet *restrict pet) {
pet->len = strlen(pet->name);
pet->counts = aligned_alloc(16, BYTES_TO_Z);
if (!pet->counts) {
perror("Failed to allocate buffer");
exit(1);
}
memset(pet->counts, 0, BYTES_TO_Z);
for (int i = 0; i < pet->len; i++)
pet->counts[(uint8_t)pet->name[i]]++;
}
static bool compare(
const Pet *restrict p, // The pet whose name we will compare
const char *restrict word, // The dictionary word
int wlen, // Length of the dictionary word
int *restrict counts // Memory we use for count differences
) {
// The word must have more letters than the pet, in total
if (wlen != p->len + COUNT_DIFF)
return false;
memcpy(counts, p->counts, BYTES_TO_Z);
for (const char *w = word; *w; w++) {
// This difference is effectively:
// frequency of this letter in pet - frequency of this letter in word
// It starts off at the pet# and decreases.
// Its permissible range for a valid word is -COUNT_DIFF <= c <= 0.
int *c = counts + (uint8_t)*w;
(*c)--;
// Does the word have greater than COUNT_DIFF of this letter more than
// the pet name?
if (*c < -COUNT_DIFF)
return false;
}
// There cannot be any counts left over that are positive. Loop over the
// letters of the pet name, which in nearly all cases are unique; so this is
// more efficient than looping over the whole alphabet.
for (const char *c = p->name; *c; c++)
if (counts[(uint8_t)*c] > 0)
return false;
return true;
}
static char *read_file(const char **restrict end) {
int fdes = open(FILENAME, O_RDONLY);
if (fdes == -1) {
perror("Failed to open " FILENAME);
exit(1);
}
struct stat fs;
if (fstat(fdes, &fs) == -1) {
perror("Failed to get size of " FILENAME);
exit(1);
}
char *start = malloc(fs.st_size+1);
if (!start) {
perror("Failed to allocate dictionary buffer");
exit(1);
}
ssize_t nread = read(fdes, start, fs.st_size);
if (nread != fs.st_size) {
perror("Failed to read " FILENAME);
exit(1);
}
*end = start + fs.st_size;
start[fs.st_size] = '\0';
return start;
}
static int upper_and_len(char *restrict str) {
// Capitalize all letters, find non-printable newline
int n;
for (n = 0; str[n] >= ' '; n++)
if (str[n] >= 'a' && str[n] <= 'z')
str[n] &= ~('A' ^ 'a');
str[n] = '\0'; // Replace newline with null
return n; // Return length of string to the null
}
int main() {
for (Pet *p = pets; p < pets+N_PETS; p++)
init_pet(p);
int *counts = aligned_alloc(16, N_COUNT_BYTES);
if (!counts) {
perror("Failed to allocate working memory buffer");
exit(1);
}
const char *words_end;
int wlen;
for (char *word = read_file(&words_end); word < words_end; word += wlen + 1) {
wlen = upper_and_len(word);
for (Pet *p = pets; p < pets+N_PETS; p++)
if (compare(p, word, wlen, counts))
printf("%s -> %s = %s Pet\n",
word, p->name, p->common ? "Common" : "Uncommon");
}
return 0;
}https://codereview.stackexchange.com/questions/239477
复制相似问题