首页
学习
活动
专区
圈层
工具
发布
社区首页 >问答首页 >BFF寻字器

BFF寻字器
EN

Code Review用户
提问于 2020-03-27 01:20:28
回答 3查看 113关注 0票数 3

这个问题 on Puzzling.StackExchange的启发,我决定编写一个小python脚本来确定一个单词是否是BFF单词。下面是通过困扰用户斯蒂夫解决的参数,在程序模块docstring中也进行了讨论:

如果你删除了两个字母,你就会留下一只普通的家养宠物(“男人最好的朋友”,因此也就是“最好的朋友”--永远的好朋友)。此外,非BFF的单词显示了一个不同的但相关的模式--如果你删除两个字母,你就会留下一只不是普通的家养宠物的动物的触角!

我想要改进的主要是性能。我使用requests检索大约475个英语单词的列表,以确定它们是否是BFF单词。然后,我使用itertools.permutations获取所有可能的英语单词组合,根据它被测试的宠物的长度,然后根据已经存在的普通/不常见动物列表来测试它。

经过大约15分钟的运行,程序仍然在ab...部分的英语单词。我认为花的时间最长的部分是产生排列,所以我想知道是否有更好的方法来做到这一点。

script.py

代码语言:javascript
复制
"""
A function that determines if a string is a BFF Word.

If you remove two letters, and the remaining letters are
an anagram for a household pet, then it's a BFF Word.

Example:

GOURD -> remove UR -> DOG = Common Pet
ALBINO -> remove AB -> LION = Uncommon Pet
HELLO -> remove any 2 -> None = Neither
"""

from enum import Enum
from itertools import permutations
import requests

REPOSITORY = "https://raw.githubusercontent.com/dwyl/english-words/master/words.txt"

COMMON_PET = ["dog", "cat", "lizard", "rabbit", "hamster", "fish"]
UNCOMMON_PET = ["bear", "rhino", "lion", "tiger", "viper", "hyena"]

class PetType(Enum):
    COMMON = 1
    UNCOMMON = 2
    NEITHER = 3

def type_of_pet(word: str) -> PetType:
    """
    Returns the type of pet that the passed word is.
    """
    for pet in COMMON_PET:
        for string in permutations(word, len(pet)):
            if pet == ''.join(string):
                return PetType.COMMON

    for pet in UNCOMMON_PET:
        for string in permutations(word, len(pet)):
            if pet == ''.join(string):
                return PetType.UNCOMMON

    return PetType.NEITHER

def main():
    req = requests.get(REPOSITORY)
    if req.status_code == 200:
        words = req.text.split()
    for word in words:
        print(f"Word: {word.lower()} | Type of Pet: {type_of_pet(word.lower())}")


if __name__ == "__main__":
    main()
EN

回答 3

Code Review用户

回答已采纳

发布于 2020-03-27 02:40:16

我能想到的最简单的提速就是一张数字母的通行证。换句话说:将collections.Counter()应用于所讨论的单词,并为这两种宠物类型保留一个预先计算好的Counters元组。

扼杀你的表现的是秩序--有很多,很多,很多来自permutations的重新排序的结果,但是它们实际上并不重要,因为你在处理一个字谜。所以当你和上面提到的Counters进行比较时,请检查一下

  • 没有增加的字母,而且
  • 总共减少了两倍。

下面是一个非常粗略的实现,似乎是快速的:

代码语言:javascript
复制
from collections import Counter
import requests


class Pet:
    __slots__ = ('name', 'counter', 'is_common', 'letters')
    def __init__(self, name: str, is_common: bool):
        self.name, self.is_common = name, is_common
        self.counter = Counter(self.name)
        self.letters = set(self.counter.keys())

    def matches(self, word: str) -> bool:
        if len(word) != 2 + len(self.name): return False
        wcounter = Counter(word)
        total = 0
        for letter in self.letters | set(wcounter.keys()):
            diff = wcounter[letter] - self.counter[letter]
            if diff < 0: return False
            total += diff
            if total > 2: return False
        return total == 2

    def __str__(self): return self.name


pets = [
    *(Pet(name, True) for name in ('dog', 'cat', 'lizard', 'rabbit', 'hamster', 'fish')),
    *(Pet(name, False) for name in ('bear', 'rhino', 'lion', 'tiger', 'viper', 'hyena')),
]

print('Downloading...', end=' ')
resp = requests.get('https://github.com/dwyl/english-words/blob/master/words.txt?raw=true')
resp.raise_for_status()
words = resp.text.splitlines()
print(f'{len(words)} downloaded.')

for word in words:
    for pet in pets:
        if pet.matches(word.lower()):
            print(f'{word} -> {pet} = {"Common" if pet.is_common else "Uncommon"} Pet')

它可以通过使用多线程等来加速。

票数 4
EN

Code Review用户

发布于 2020-03-27 06:59:49

使用规则

不要寻找符合规则的单词!你已经知道规矩了。使用它生成BFF单词。也就是说,从一个普通的宠物开始,过滤掉所有不长两个字母或者没有普通宠物中所有字母的单词。结果是列出了这只宠物的BFF单词。非BFF单词使用相同的规则生成,但从不常见的宠物开始。大约125毫秒。

代码语言:javascript
复制
import random
from collections import Counter

COMMON_PET = ["dog", "cat", "lizard", "rabbit", "hamster", "fish"]
UNCOMMON_PET = ["bear", "rhino", "lion", "tiger", "viper", "hyena"]

def BFF_word(pet, word_list):
    word_len = len(pet) + 2
    count = {letter:pet.count(letter) for letter in pet}

# only keep words that have the right length, no punctuation,
# and the right numbers of letters, and also don't contain
# the common pet, e.g. 'rabbited' -> 'rabbit' (too easy). 
BFFs = [word for word in word_list
            if len(word) == word_len
            if word.isalpha()
            if all(count[letter]<=word.count(letter) for letter in count)
            if pet not in word
            ]


    # uncomment to see how many BFFs there are and the first few
    #print(pet, len(BFFs), BFFs[:5])

    return random.choice(BFFs)

    
def main():
    # I just used a local word list
    with open("/usr/share/dict/words") as f:
        words = [line.strip().lower() for line in f]

    print("BFF words")
    for pet in COMMON_PET:
        word = BFF_word(pet, words)
        print(f'{word} -> {pet}')

    print("non-BFF words")
    for pet in UNCOMMON_PET:
        word = BFF_word(pet, words)
        print(f'{word} -> {pet}')

if __name__ == "__main__":
    main()
票数 3
EN

Code Review用户

发布于 2020-03-28 03:15:38

现在来看看完全不同的东西: C中的一个实现。

这有点简单和愚蠢,对于所有意图和目的,它立即执行。它没有任何散列映射或散列集。它跟踪每个宠物,一个字母频率计数阵列是稀疏的-它技术上跟踪整个ASCII-扩展的范围,以提高效率。

这作出了一些明目张胆的假设:

  • 区域设置被忽略
  • 字母大小写被忽略
  • 假定words.txt已被下载
  • 由于文件调用,这可能只与类Unix操作系统兼容。
  • 标点符号、空格等算作“可移除的字符”,以满足标准。
代码语言:javascript
复制
#include <fcntl.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
#include <sys/stat.h>
#include <unistd.h>

#define FILENAME "words.txt"

typedef struct {
    const bool common; // Is this a common pet?
    const char *name;  // Pet's name, capitalized
    int len,           // Length of pet's name, excluding null
        *counts;       // Array of letter frequencies
} Pet;

// Assume ASCII everywhere; this is the number of symbols whose frequency we count
#define N_COUNTS 256
// The size of a frequency-counting array for the above character set
#define N_COUNT_BYTES (N_COUNTS * sizeof(int))
// The number of bytes if we only care about counting the upper-case alphabet
#define BYTES_TO_Z ((1 + (int)'Z') * sizeof(int))
// The number of letters that the word must lose to get to the pet name
#define COUNT_DIFF 2

static Pet pets[] = {
    { true, "DOG"     },
    { true, "CAT"     },
    { true, "LIZARD"  },
    { true, "RABBIT"  },
    { true, "HAMSTER" },
    { true, "FISH"    },
    { false, "BEAR"  },
    { false, "RHINO" },
    { false, "LION"  },
    { false, "TIGER" },
    { false, "VIPER" },
    { false, "HYENA" },
};
#define N_PETS (sizeof(pets)/sizeof(Pet))

static void init_pet(Pet *restrict pet) {
    pet->len = strlen(pet->name);

    pet->counts = aligned_alloc(16, BYTES_TO_Z);
    if (!pet->counts) {
        perror("Failed to allocate buffer");
        exit(1);
    }
    memset(pet->counts, 0, BYTES_TO_Z);
    for (int i = 0; i < pet->len; i++)
        pet->counts[(uint8_t)pet->name[i]]++;
}

static bool compare(
    const Pet *restrict p,     // The pet whose name we will compare
    const char *restrict word, // The dictionary word
    int wlen,                  // Length of the dictionary word
    int *restrict counts       // Memory we use for count differences
 ) {
    // The word must have more letters than the pet, in total
    if (wlen != p->len + COUNT_DIFF)
        return false;

    memcpy(counts, p->counts, BYTES_TO_Z);

    for (const char *w = word; *w; w++) {
        // This difference is effectively:
        // frequency of this letter in pet - frequency of this letter in word
        // It starts off at the pet# and decreases.
        // Its permissible range for a valid word is -COUNT_DIFF <= c <= 0.
        int *c = counts + (uint8_t)*w;
        (*c)--;
        // Does the word have greater than COUNT_DIFF of this letter more than
        // the pet name?
        if (*c < -COUNT_DIFF)
            return false;
    }

    // There cannot be any counts left over that are positive. Loop over the
    // letters of the pet name, which in nearly all cases are unique; so this is
    // more efficient than looping over the whole alphabet.
    for (const char *c = p->name; *c; c++)
        if (counts[(uint8_t)*c] > 0)
            return false;

    return true;
}

static char *read_file(const char **restrict end) {
    int fdes = open(FILENAME, O_RDONLY);
    if (fdes == -1) {
        perror("Failed to open " FILENAME);
        exit(1);
    }
    struct stat fs;
    if (fstat(fdes, &fs) == -1) {
        perror("Failed to get size of " FILENAME);
        exit(1);
    }

    char *start = malloc(fs.st_size+1);
    if (!start) {
        perror("Failed to allocate dictionary buffer");
        exit(1);
    }

    ssize_t nread = read(fdes, start, fs.st_size);
    if (nread != fs.st_size) {
        perror("Failed to read " FILENAME);
        exit(1);
    }

    *end = start + fs.st_size;
    start[fs.st_size] = '\0';
    return start;
}

static int upper_and_len(char *restrict str) {
    // Capitalize all letters, find non-printable newline
    int n;
    for (n = 0; str[n] >= ' '; n++)
        if (str[n] >= 'a' && str[n] <= 'z')
            str[n] &= ~('A' ^ 'a');
    str[n] = '\0'; // Replace newline with null
    return n;      // Return length of string to the null
}

int main() {
    for (Pet *p = pets; p < pets+N_PETS; p++)
        init_pet(p);

    int *counts = aligned_alloc(16, N_COUNT_BYTES);
    if (!counts) {
        perror("Failed to allocate working memory buffer");
        exit(1);
    }

    const char *words_end;
    int wlen;
    for (char *word = read_file(&words_end); word < words_end; word += wlen + 1) {
        wlen = upper_and_len(word);
        for (Pet *p = pets; p < pets+N_PETS; p++)
            if (compare(p, word, wlen, counts))
                    printf("%s -> %s = %s Pet\n",
                        word, p->name, p->common ? "Common" : "Uncommon");
    }

    return 0;
}
票数 3
EN
页面原文内容由Code Review提供。腾讯云小微IT领域专用引擎提供翻译支持
原文链接:

https://codereview.stackexchange.com/questions/239477

复制
相关文章

相似问题

领券
问题归档专栏文章快讯文章归档关键词归档开发者手册归档开发者手册 Section 归档