首页
学习
活动
专区
圈层
工具
发布
社区首页 >问答首页 >数组中唯一值的记录

数组中唯一值的记录
EN

Stack Overflow用户
提问于 2018-03-18 14:31:26
回答 1查看 81关注 0票数 0

我需要添加删除重复单词的代码,当检查时(例如,在文本中有几个单词' book‘,它将它们显示为在编译时输出的重复单词,如- book)。这本词典大约有14万字。请告诉我如何执行唯一性检查,以便将不相同的单词写入'uniq‘数组。

代码语言:javascript
复制
#include <ctype.h>
#include <stdio.h>
#include <sys/resource.h>
#include <sys/time.h>
#include <string.h>

#include "dictionary.h"
#undef calculate
#undef getrusage

// default dictionary
#define DICTIONARY "dictionaries/large"

// prototype
double calculate(const struct rusage* b, const struct rusage* a);

int main(int argc, char* argv[])
{
    // check for correct number of args
    if (argc != 2 && argc != 3)
    {
        printf("Usage: speller [dictionary] text\n");
        return 1;
    }

    // structs for timing data
    struct rusage before, after;

    // benchmarks
    double time_load = 0.0, time_check = 0.0, time_size = 0.0, time_unload = 0.0;

    // determine dictionary to use
    char* dictionary = (argc == 3) ? argv[1] : DICTIONARY;

    // load dictionary
    getrusage(RUSAGE_SELF, &before);
    bool loaded = load(dictionary);
    getrusage(RUSAGE_SELF, &after);

    // abort if dictionary not loaded
    if (!loaded)
    {
        printf("Could not load %s.\n", dictionary);
        return 1;
    }

    // calculate time to load dictionary
    time_load = calculate(&before, &after);

    // try to open text
    char* text = (argc == 3) ? argv[2] : argv[1];

    //The variable fp contains the literary text
    FILE* fp = fopen(text, "r");

    if (fp == NULL)
    {
        printf("Could not open %s.\n", text);
        unload();
        return 1;
    }

    // prepare to report misspellings
    printf("\nMISSPELLED WORDS\n\n");

    // prepare to spell-check
    int index = 0, misspellings = 0, words = 0;
    char word[LENGTH+1];

    //uniq_array
    char uniq[300][50];

    //count no uniq words
    int countsUniq = 0;

    // spell-check each word in text
    for (int c = fgetc(fp); c != EOF; c = fgetc(fp))
    {
        // allow only alphabetical characters and apostrophes
        if (isalpha(c) || (c == '\'' && index > 0))
        {
            // append character to word
            word[index] = c;
            index++;

            // ignore alphabetical strings too long to be words
            if (index > LENGTH)
            {
                // consume remainder of alphabetical string
                while ((c = fgetc(fp)) != EOF && isalpha(c));

                // prepare for new word
                index = 0;
            }
        }

        // ignore words with numbers (like MS Word can)
        else if (isdigit(c))
        {
            // consume remainder of alphanumeric string
            while ((c = fgetc(fp)) != EOF && isalnum(c));

            // prepare for new word
            index = 0;
        }

        // we must have found a whole word
        else if (index > 0)
        {
            // terminate current word
            word[index] = '\0';

            // update counter
            words++;

            // check word's spelling
            getrusage(RUSAGE_SELF, &before);
            bool misspelled = !check(word);
            getrusage(RUSAGE_SELF, &after);

            // update benchmark
            time_check += calculate(&before, &after);


            // prepare for next word
            index = 0;
            // print word if misspelled
            if (misspelled)
            {
                //here takes place check for uniqueness and record to an array
                misspellings++;
                for(int j = 0; j < 300; j++){
                    if(strcmp(uniq[j], word) == 0){
                        countsUniq++;
                        break;
                    }
                }
                if(countsUniq == 0){
                    for(int i = 0; i < 300; i++){
                        if(strcmp(uniq[i], "") == 0){
                            strcpy(uniq[i], word);
                            break;
                        }
                    }
                }
                countsUniq = 0;
            }
        }
    }

    for(int i = 0; i < 300; i++){
       printf("%s\n", uniq[i]);
    }

    // check whether there was an error
    if (ferror(fp))
    {
        fclose(fp);
        printf("Error reading %s.\n", text);
        unload();
        return 1;
    }

    // close text
    fclose(fp);

    // determine dictionary's size
    getrusage(RUSAGE_SELF, &before);
    unsigned int n = size();
    getrusage(RUSAGE_SELF, &after);

    // calculate time to determine dictionary's size
    time_size = calculate(&before, &after);

    // unload dictionary
    getrusage(RUSAGE_SELF, &before);
    bool unloaded = unload();
    getrusage(RUSAGE_SELF, &after);

    // abort if dictionary not unloaded
    if (!unloaded)
    {
        printf("Could not unload %s.\n", dictionary);
        return 1;
    }

    // calculate time to unload dictionary
    time_unload = calculate(&before, &after);

    // report benchmarks
    printf("\nWORDS MISSPELLED:     %d\n", misspellings);
    printf("WORDS IN DICTIONARY:  %d\n", n);
    printf("WORDS IN TEXT:        %d\n", words);
    printf("TIME IN load:         %.2f\n", time_load);
    printf("TIME IN check:        %.2f\n", time_check);
    printf("TIME IN size:         %.2f\n", time_size);
    printf("TIME IN unload:       %.2f\n", time_unload);
    printf("TIME IN TOTAL:        %.2f\n\n",
     time_load + time_check + time_size + time_unload);

    return 0;
}

/**
 * Returns number of seconds between b and a.
 */
double calculate(const struct rusage* b, const struct rusage* a)
{
    if (b == NULL || a == NULL)
    {
        return 0.0;
    }
    else
    {
        return ((((a->ru_utime.tv_sec * 1000000 + a->ru_utime.tv_usec) -
                 (b->ru_utime.tv_sec * 1000000 + b->ru_utime.tv_usec)) +
                ((a->ru_stime.tv_sec * 1000000 + a->ru_stime.tv_usec) -
                 (b->ru_stime.tv_sec * 1000000 + b->ru_stime.tv_usec)))
                / 1000000.0);
    }
}

在输出时,我收到了一些不应该到达的单词和符号的列表,有些行是空的,我也不知道为什么:

代码语言:javascript
复制
nonproprietary
s
F
IS'
MERCHANTIBILITY
unenforceability




Q@
<
=
@

提前谢谢你的帮助。

EN

回答 1

Stack Overflow用户

回答已采纳

发布于 2018-03-18 15:31:40

压缩字符串通常是通过库函数strcmp完成的。字符串不能通过==运算符进行比较。

此外,=运算符不能分配字符串。

代码语言:javascript
复制
uniq[misspellings][j] = word[j]; // it will not work

使用strcpy复制字符串。

这是一个简单的程序来说明上述概念。可以帮助你理解你的问题。

代码语言:javascript
复制
#include <stdio.h>
#include <string.h>

#define LENGTH 30
#define ROWS    5

int main(void) {

    char word[LENGTH+1] = "word";
    char uniq[ROWS][LENGTH+1] = { "eva", "buba", "word" , "1235",  "stop"};

    for(int j = 0; j < ROWS; j++){

        if(strcmp(uniq[j], word) == 0) // find word
        {
            printf("We have found: <%s>\n", word);
            // replacing with "hello":
            strcpy(uniq[j], "hello");
        }
    }

    printf("Strings in uniq:\n", word);
    for(int j = 0; j < ROWS; j++){
        printf("%s ", uniq[j]);
    }

  return 0;
}

输出:

代码语言:javascript
复制
We have found: <word>                                                                                                                         
Strings in uniq:                                                                                                                              
eva buba hello 1235 stop 
票数 0
EN
页面原文内容由Stack Overflow提供。腾讯云小微IT领域专用引擎提供翻译支持
原文链接:

https://stackoverflow.com/questions/49349035

复制
相关文章

相似问题

领券
问题归档专栏文章快讯文章归档关键词归档开发者手册归档开发者手册 Section 归档