首页
学习
活动
专区
圈层
工具
发布
社区首页 >问答首页 >为什么我的trie泄露数据?

为什么我的trie泄露数据?
EN

Stack Overflow用户
提问于 2016-10-09 22:19:29
回答 1查看 129关注 0票数 0

我试着用trie做一个拼写检查器,但是我试图添加到我的trie中的单词似乎没有被加入。泄密处在哪里?

我花了几个小时使用调试器并逐步完成我的代码。

主要职能:

代码语言:javascript
复制
/**
 * Implements a spell-checker.
 */

#include <ctype.h>
#include <stdio.h>
#include <sys/resource.h>
#include <sys/time.h>

#include "dictionary.h"
#undef calculate
#undef getrusage

// default dictionary
#define DICTIONARY "dictionaries/large"

// prototype
double calculate(const struct rusage *b, const struct rusage *a);

int main(int argc, char *argv[])
{
    // check for correct number of args
    if (argc != 2 && argc != 3)
    {
        printf("Usage: speller [dictionary] text\n");
        return 1;
    }

    // structs for timing data
    struct rusage before, after;

    // benchmarks
    double time_load = 0.0, time_check = 0.0, time_size = 0.0, time_unload = 0.0;

    // determine dictionary to use
    char* dictionary = (argc == 3) ? argv[1] : DICTIONARY;

    // load dictionary
    getrusage(RUSAGE_SELF, &before);
    bool loaded = load(dictionary);
    getrusage(RUSAGE_SELF, &after);

    // abort if dictionary not loaded
    if (!loaded)
    {
        printf("Could not load %s.\n", dictionary);
        return 1;
    }

    // calculate time to load dictionary
    time_load = calculate(&before, &after);

    // try to open text
    char *text = (argc == 3) ? argv[2] : argv[1];
    FILE *fp = fopen(text, "r");
    if (fp == NULL)
    {
        printf("Could not open %s.\n", text);
        unload();
        return 1;
    }

    // prepare to report misspellings
    printf("\nMISSPELLED WORDS\n\n");

    // prepare to spell-check
    int index = 0, misspellings = 0, words = 0;
    char word[LENGTH+1];

    // spell-check each word in text
    for (int c = fgetc(fp); c != EOF; c = fgetc(fp))
    {
        // allow only alphabetical characters and apostrophes
        if (isalpha(c) || (c == '\'' && index > 0))
        {
            // append character to word
            word[index] = c;
            index++;

            // ignore alphabetical strings too long to be words
            if (index > LENGTH)
            {
                // consume remainder of alphabetical string
                while ((c = fgetc(fp)) != EOF && isalpha(c));

                // prepare for new word
                index = 0;
            }
        }

        // ignore words with numbers (like MS Word can)
        else if (isdigit(c))
        {
            // consume remainder of alphanumeric string
            while ((c = fgetc(fp)) != EOF && isalnum(c));

            // prepare for new word
            index = 0;
        }

        // we must have found a whole word
        else if (index > 0)
        {
            // terminate current word
            word[index] = '\0';

            // update counter
            words++;

            // check word's spelling
            getrusage(RUSAGE_SELF, &before);
            bool misspelled = !check(word);
            getrusage(RUSAGE_SELF, &after);

            // update benchmark
            time_check += calculate(&before, &after);

            // print word if misspelled
            if (misspelled)
            {
                printf("%s\n", word);
                misspellings++;
            }

            // prepare for next word
            index = 0;
        }
    }

    // check whether there was an error
    if (ferror(fp))
    {
        fclose(fp);
        printf("Error reading %s.\n", text);
        unload();
        return 1;
    }

    // close text
    fclose(fp);

    // determine dictionary'size
    getrusage(RUSAGE_SELF, &before);
    unsigned int n = size();
    getrusage(RUSAGE_SELF, &after);

    // calculate time to determine dictionary's size
    time_size = calculate(&before, &after);

    // unload dictionary
    getrusage(RUSAGE_SELF, &before);
    bool unloaded = unload();
    getrusage(RUSAGE_SELF, &after);

    // abort if dictionary not unloaded
    if (!unloaded)
    {
        printf("Could not unload %s.\n", dictionary);
        return 1;
    }

    // calculate time to unload dictionary
    time_unload = calculate(&before, &after);

    // report benchmarks
    printf("\nWORDS MISSPELLED:     %d\n", misspellings);
    printf("WORDS IN DICTIONARY:  %d\n", n);
    printf("WORDS IN TEXT:        %d\n", words);
    printf("TIME IN load:         %.2f\n", time_load);
    printf("TIME IN check:        %.2f\n", time_check);
    printf("TIME IN size:         %.2f\n", time_size);
    printf("TIME IN unload:       %.2f\n", time_unload);
    printf("TIME IN TOTAL:        %.2f\n\n", 
     time_load + time_check + time_size + time_unload);

    // that's all folks
    return 0;
}

/**
 * Returns number of seconds between b and a.
 */
double calculate(const struct rusage *b, const struct rusage *a)
{
    if (b == NULL || a == NULL)
    {
        return 0.0;
    }
    else
    {
        return ((((a->ru_utime.tv_sec * 1000000 + a->ru_utime.tv_usec) -
                 (b->ru_utime.tv_sec * 1000000 + b->ru_utime.tv_usec)) +
                ((a->ru_stime.tv_sec * 1000000 + a->ru_stime.tv_usec) -
                 (b->ru_stime.tv_sec * 1000000 + b->ru_stime.tv_usec)))
                / 1000000.0);
    }
}

检查trie中是否有一个单词的函数:

代码语言:javascript
复制
/**
 * Implements a dictionary's functionality.
 */

#include <stdbool.h>
#include <stdlib.h>
#include <ctype.h>
#include <string.h>
#include <stdio.h>

#include "dictionary.h"

int dictionary_size;

bool find(char *word, node *next)
{
    word++;
    if (next == NULL)
    {
        return false;
    }
    else if (word[0] == '\0')
    {
        if (next->is_word == true)
        {
            return true;
        }
        else
        {
            return false;
        }
    }
    else
    {
        //go to this nodes chold that is now eual to the first letter of the word
        if (word[0] == '\'')
        {
            return find(word, next->children[26]);
        }
        else
        {
            return find(word, next->children[word[0] - 'a']);
        }
    }
}

/**
 * Returns true if word is in dictionary else false.
 */
bool check(const char *word)
{
    //get word from text and make it malluable
    char w[47];
    memcpy(w, word, strlen(word) + 1);
    //make sure it's all lower case
    for (int i = 0; i < strlen(word); i++)
    {
        w[i] = tolower(w[i]);
    }
    //go to the root child node equal to the first letter of the word
    if (find(w, root))
    {
        return true;
    }
    else
    {
        return false;
    }
}

将单词字典加载到trie中的函数。(我猜这就是泄密的地方):

代码语言:javascript
复制
struct node *newNode(char *word, node *next)
{
    next = malloc(sizeof(node));
    word++;
    if (word[0] == '\0')
    {
        next->is_word = true;
        return next;
    }
    else
    {
        node *new_node = NULL;
        // go to this nodes choild that is now equal to the first letter of the word
        if (word[0] == '\'')
        {
           return next->children[26] = newNode(word, new_node);
        }
        else
        { 
           return next->children[word[0] - 97] = newNode(word, new_node);
        }
    }
}


/**
 * Loads dictionary into memory. Returns true if successful else false.
 */
bool load(const char *dictionary)
{
    dictionary_size = 0;
    FILE *dic = fopen(dictionary, "r");
    if (dic == NULL)
    {
        return false;
    }
    // Initalize root node
    root = malloc(sizeof(node));

    //get word from dictinary
    int ch;
    while ((ch = fgetc(dic)) != EOF)
    {
        char word[47];
        fgets(word, 47, dic);
        dictionary_size++;
        // make sure it's all lower case
        for (int i = 0; i < 47; i++)
        {
            word[i] = tolower(word[i]);
        }
        // get rid of new line char
        char *pos;
        if ((pos = strchr(word, '\n')) != NULL)
        {
            *pos = '\0';
        }
        printf("%s\n", word);
        //go to root nodes child that is equal to the first letter of the word
        node *child_node = NULL;
        root->children[word[0] - 'a'] = newNode(word, child_node);
    }
return true;
}

/**
 * Returns number of words in dictionary if loaded else 0 if not yet loaded.
 */
unsigned int size(void)
{
    if (dictionary_size != 0)
    {
        return dictionary_size;
    }
    return 0;
}

void free_node(node *next)
{
    // safety including root node
    if(!next) return;   

    // takes you to end of trie
    for (int i = 0; i < 26; i++)
    {
       free_node(next->children[i]);
    }

    // base case
    free(next);
}

/**
 * Unloads dictionary from memory. Returns true if successful else false.
 */
bool unload(void)
{
    free_node(root);
    return true;
}

定义节点结构:

代码语言:javascript
复制
typedef struct node
{
    bool is_word;
    struct node *children[27]; 
}
node;

node *root;
EN

回答 1

Stack Overflow用户

回答已采纳

发布于 2016-10-10 00:04:57

此代码使用彻底重写的addWord()函数代替addNode()

代码语言:javascript
复制
/* SO 3994-9116 */
#include <assert.h>
#include <ctype.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

typedef struct node
{
    bool is_word;
    struct node *children[27];
} node;

static node *root = 0;
static int dictionary_size = 0;

static void addWord(char *word, node *trie)
{
    assert(islower((unsigned char)word[0]) || word[0] == '\0');
    if (word[0] == '\0')
        trie->is_word = true;
    else
    {
        int code = word[0] - 'a';
        if (trie->children[code] == 0)
        {
            trie->children[code] = calloc(sizeof(node), 1);
            if (trie->children[code] == 0)
            {
                fprintf(stderr, "Memory allocation failed\n");
                exit(1);
            }
        }
        addWord(&word[1], trie->children[code]);
    }
}

static
bool load(const char *dictionary)
{
    dictionary_size = 0;
    FILE *dic = fopen(dictionary, "r");
    if (dic == NULL)
        return false;
    root = calloc(1, sizeof(node));
    if (root == 0)
    {
        fprintf(stderr, "Memory allocation failed\n");
        exit(1);
    }

    char word[47];
    while (fgets(word, sizeof(word), dic) != 0)
    {
        char *pos;
        if ((pos = strchr(word, '\n')) != NULL)
            *pos = '\0';
        dictionary_size++;
        for (int i = 0, len = strlen(word); i < len; i++)
        {
            if (!isalpha((unsigned char)word[i]))
                fprintf(stderr, "Non-alphabetic character in '%s'\n", word);
            else
                word[i] = tolower((unsigned char)word[i]);
        }
        addWord(word, root);
    }
    return true;
}

static void print_subtrie(const char *prefix, char nchar, node *child)
{
    if (child != 0)
    {
        int len = strlen(prefix);
        char buffer[len + 2];
        strcpy(buffer, prefix);
        buffer[len] = nchar;
        buffer[len+1] = '\0';
        if (child->is_word)
            printf("%s\n", buffer);
        for (int i = 0; i < 26; i++)
            print_subtrie(buffer, 'a' + i, child->children[i]);
    }
}

static void print_trie(node *top)
{
    for (int i = 0; i < 26; i++)
        print_subtrie("", 'a' + i, top->children[i]);
}

static void free_trie(node *trie)
{
    if (trie != 0)
    {
        for (int i = 0; i < 27; i++)
        {
            if (trie->children[i] != 0)
                free_trie(trie->children[i]);
        }
        free(trie);
    }
}

int main(void)
{
    if (load("words.list"))
    {
        printf("Nominal dictionary size: %d\n", dictionary_size);
        print_trie(root);
        free_trie(root);
    }
    return 0;
}

测试文件1:

代码语言:javascript
复制
aardvark
abelone
assignation
assignment
assigned
assert
assertion

输出文件1:

代码语言:javascript
复制
Nominal dictionary size: 7
aardvark
abelone
assert
assertion
assignation
assigned
assignment

测试文件2:

代码语言:javascript
复制
Mike
Aquarius
delta
assert
zulu
assertion
alpha
aardvark
abelone
culmination
Zulu
kilo
amniocentesis
Oscar
Yankee
cabal
echo
Sierra
duck
Charlie
Quebec
centre
assigned
Papa
foxtrot
uMBRelLA
Romeo
cab
India
Juliet
bravo
amniocentesis
hotel
amniocentesis
Lima
Whisky
cab
culminate
cold
Xray
cab
cabs
assignation
cam
Victor
golf
Tango
November
cinema
cabbie
assignment

输出文件2:

代码语言:javascript
复制
Nominal dictionary size: 56
a
aardvark
abelone
alpha
amniocentesis
aquarius
as
assert
assertion
assign
assignation
assigned
assignment
bravo
cab
cabal
cabbie
cabs
cam
centre
charlie
cinema
cold
culminate
culmination
delta
duck
echo
foxtrot
golf
hotel
i
india
juliet
kilo
lima
mike
november
o
oscar
papa
quebec
romeo
sierra
tango
umbrella
victor
whisky
xray
yankee
zulu

注意,56的“标称大小”太大了;输入和输出中有46个不同的单词。

票数 2
EN
页面原文内容由Stack Overflow提供。腾讯云小微IT领域专用引擎提供翻译支持
原文链接:

https://stackoverflow.com/questions/39949116

复制
相关文章

相似问题

领券
问题归档专栏文章快讯文章归档关键词归档开发者手册归档开发者手册 Section 归档