文章/答案/技术大牛

发布

社区首页 >问答首页 >C文本格式化程序字符计数已关闭

问C文本格式化程序字符计数已关闭
EN

Stack Overflow用户

提问于 2022-05-11 15:09:16

回答 2查看 83关注 0票数 1

因此，我有一个根据规则格式化文本的赋值，并最终打印出字符串中的字符数(包括\n和空格，除了字符串末尾的\0 )。

基本上，一个有效的输入以一个点结尾，但我认为在最后的点之后还有几个空格。我尝试过几种方法，比如循环，从字符串末尾开始用\0替换空格。但似乎什么都不管用..。

编辑需求如下：

转换双点(..)对于一个新行，apostrophes.
and
删除只留下一个空格的多个空格，
确保在逗号或点之前没有空格，以确保逗号或点之后有单个空格。
不更改原始内容，以确认只有大写字母在正确的位置(新的line\paragraph).

)。

我们需要执行主函数中的所有代码(不幸)，而me代码通常在计数中将字符计数错误为1-2 (可能是在最后一个点之后对额外空格执行)。

这是一个输入示例，我的代码在计算字符时失败了。

the LANGUAGE  "C" is a procedural              programming language     .It was initially developed by "Dennis Ritchie"..            the Main feAtures of "C" language include low-level access to memory, simple set of keywords, and clean style                .

int main() {
    char ans;

    printf("*************** Welcome to the text cleaner ***************\n\n");

    do
    {
        int length, i, j = 0;
        int word, sentence, para, space;
        char tin[601], tout[601], * dex, * pos;
        printf("\nPlease enter text:\n");
        gets_s(tin, 600);
        length = strlen(tin);

        dex = strchr(tin, '.'); //converts double dots to new line
        while (dex != NULL)
        {
            if (tin[dex - tin + 1] == '.') {
                tin[dex - tin + 1] = '\n';
            }
            dex = strchr(dex + 1, '.');
        }
        length = strlen(tin);


        dex = strchr(tin, ' '); //converting multiple spaces to single space
        while (dex != NULL)
        {
            while (dex != NULL && tin[dex - tin + 1] == ' ')
            {
                for (i = dex - tin + 1; i < strlen(tin); i++)
                {
                    tin[i - 1] = tin[i];
                }
                dex = strchr(dex, ' ');
                j++;
            }
            dex = strchr(dex + 1, ' ');
        }

        tin[length - j] = '\0';
        j = 0;

        dex = strchr(tin, '\n');

        while (dex != NULL && tin[dex-tin+1] == ' ') //delets spaces in the beggining of new row
        {
            for (i = dex - tin + 1;i < strlen(tin);i++) {
                tin[i] = tin[i + 1];
            }
            dex = strchr(dex + 1, '\n');
        }

        dex = strchr(tin, ','); //deletes space before comma
        while (dex != NULL && tin[dex - tin - 1] == ' ')
        {
            for (i = dex - tin - 1; i < strlen(tin); i++)
            {
                tin[i] = tin[i+1];
            }
            dex = strchr(dex+1, ',');
        }

        dex = strchr(tin, '.'); //deletes space before dots
        while (dex != NULL && tin[dex - tin - 1] == ' ')
        {
            for (i = dex - tin - 1; i < strlen(tin); i++)
            {
                tin[i] = tin[i+1];
            }
            dex = strchr(dex + 1, '.');
        }



        dex = strchr(tin, ','); // adds space after comma
        while (dex != NULL && tin[dex - tin + 1] != ' ')    
        {
            if (tin[dex - tin + 1] != '\n')
            {
                tin[strlen(tin) + 1] = '\0';
                for (i = strlen(tin); i > dex - tin; i--)
                {
                    if (i == dex - tin + 1)
                    {
                        tin[i] = ' ';
                    }
                    else
                    {
                        tin[i] = tin[i - 1];
                    }
                }
                dex = strchr(dex + 1, ',');
            }
        }

        dex = strchr(tin, '.'); // adds space after dot

        while (dex != NULL && tin[dex - tin + 1] != ' ')
        {
                tin[strlen(tin) + 1] = '\0';
                if (tin[dex - tin + 1] == '\n')
                {
                    dex = strchr(dex + 1, '.');
                }
                else
                {
                    for (i = strlen(tin); i > dex - tin; i--)
                    {
                        if (i == dex - tin + 1)
                        {
                            tin[i] = ' ';
                        }
                        else
                        {
                            tin[i] = tin[i - 1];
                        }
                    }
                    dex = strchr(dex + 1, '.');
                }           
        }

        strcpy_s(tout, sizeof(tout), tin);
        _strlwr_s(tout,sizeof(tout)); //copies and lowercasing the input string

        dex = strchr(tin, '"');
        
        if (dex != NULL) {
            pos = strchr(dex + 1, '"');

            while (dex != NULL)
            {
                for (i = dex - tin; i < pos - tin; i++) {
                    tout[i] = tin[i];
                }
                dex = strchr(pos + 1, '"');
                if (dex)
                {
                    pos = strchr(dex + 1, '"');
                }
            }  //making sure that the letters in the quotes have't been lowercased
        }
        
        _strupr_s(tin, sizeof(tin));

        dex = strchr(tout, '.');
        pos = strchr(tin, '.');
        while (dex != NULL && pos != NULL)
        {
                tout[dex - tout + 2] = tin[pos - tin + 2];
                dex = strchr(dex + 1, '.');     
                pos = strchr(pos + 1, '.');
        }   
        //CAPSLOCK

        dex = strchr(tout, '.'); //deletes space before dots
        while (dex != NULL)
        {
            if (tout[dex - tout - 1] == ' ')
            {
                for (i = dex - tout - 1; i < strlen(tout); i++)
                {
                    tout[i] = tout[i+1];
                }
            }
            
            dex = strchr(dex + 1, '.');
        }

        if (tout[0] == ' ') {
            for (i = 0 ;i < strlen(tout); i++) {
                tout[i] = tout[i + 1];
            }
        }//handeling single space in the beggining of the string

        if (tout[0] >= 'a' && tout[0] <= 'z') {
            tout[0] -= 32;
        } //First letter always capital


        word = 0;
        sentence = 0;
        para = 1;
        space = 0;
        length = strlen(tout);

        for (i = 0; tout[i] != '\0';i++)
        {
            if (tout[i] == ' ' && tout[i + 1] != ' ')
                word++;
        }



        dex = strchr(tout, '.');
        while (dex != NULL)
        {
            sentence++;
            dex = strchr(dex + 1, '.');
        }

        dex = strchr(tout, '\n');
        while (dex != NULL)
        {
            space++;
            para++;
            word++;
            dex = strchr(dex + 1, '\n');
        } 

        //dex = strchr(tout, '-');
        //while (dex != NULL)
        //{
        //  word++;
        //  dex = strchr(dex + 1, '-');
        //}

        printf_s("\nText after cleaning:\n------------------------------------------------------------------------------------------------\n");
        printf_s("%s\n\n", tout);
        printf_s("characters: %d | words: %d | sentences: %d | paragraphs: %d\n------------------------------------------------------------------------------------------------\n",length, word, sentence, para);
        printf_s("\nIf you want to clean another string press (y): ");
        scanf_s(" %c", &ans, 1);
        if (ans == 'y')
        {
            gets_s(tin, 600);

        }
    } while (ans =='y');

string

loops

whitespace

回答 2

Stack Overflow用户

回答已采纳

发布于 2022-05-11 21:45:48

正如我在最上面的注释中提到的，这可以在一个带有状态变量的循环中完成。

一些假设：

每当我们看到converted

Quotes (它被转换为换行符)时，它就会开始一个新的段落

--您称之为“撇号”，我称之为双引号(因为这是唯一有意义的事情)。引号中的

不幸的是，我不得不完全重构代码。这是附加说明的。我知道你只需要使用main。额外的函数仅用于调试，因此它们“不算”：

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>

int opt_d;                          // debug
int opt_quo;                        // preserve quote

#if DEBUG
#define dbgprt(_fmt...) \
    do { \
        if (opt_d) \
            printf(_fmt); \
    } while (0)
#else
#define dbgprt(_fmt...)         do { } while (0)
#endif

#define COPYX(_chr) \
    do { \
        *dst++ = _chr; \
        dbgprt("COPY %2.2X/%s\n",_chr,showchr(_chr)); \
    } while (0)

#define COPY \
    COPYX(chr)

#define WHITEOUT \
    do { \
        if (! white) \
            break; \
        COPYX(' '); \
        white = 0; \
        ctr_word += 1; \
    } while (0)

const char *
showchr(int chr)
{
    static char buf[10];

    if ((chr >= 0x20) && (chr <= 0x7E))
        sprintf(buf,"%c",chr);
    else
        sprintf(buf,"{%2.2X}",chr);

    return buf;
}

void
showbuf(const char *buf,const char *who)
{
    const char *sep = "'";

    printf("%s: %s",who,sep);

    for (int chr = *buf++;  chr != 0;  chr = *buf++)
        printf("%s",showchr(chr));

    printf("%s\n",sep);
}

int
main(int argc,char **argv)
{
    char inp[1000];
    char buf[1000];
    char *src;
    char *dst;

    --argc;
    ++argv;

    for (;  argc > 0;  --argc, ++argv) {
        char *cp = *argv;
        if (*cp != '-')
            break;

        cp += 2;
        switch (cp[-1]) {
        case 'd':
            opt_d = ! opt_d;
            break;
        case 'q':
            opt_quo = ! opt_quo;
            break;
        }
    }

    opt_quo = ! opt_quo;

    const char *file;
    if (argc > 0)
        file = *argv;
    else
        file = "inp.txt";

    FILE *xfsrc = fopen(file,"r");
    if (xfsrc == NULL) {
        perror(file);
        exit(1);
    }

    while (fgets(inp,sizeof(inp),xfsrc) != NULL) {
        strcpy(buf,inp);

        src = buf;
        dst = buf;

        int quo = 0;
        int white = 0;
        int dot = 1;
        int ctr_sent = 0;
        int ctr_word = 0;
        int ctr_para = 1;

        for (int chr = *src++;  chr != 0;  chr = *src++) {
            dbgprt("LOOP %2.2X/%s quo=%d white=%d dot=%d word=%d sent=%d para=%d\n",
                chr,showchr(chr),quo,white,dot,
                ctr_word,ctr_sent,ctr_para);

            // got a quote
            if (chr == '"') {
                if (! quo)
                    WHITEOUT;
                if (opt_quo)
                    COPY;
                quo = ! quo;
                continue;
            }

            // non-quote
            else {
                if (quo) {
                    COPY;
                    continue;
                }
            }

            // got a dot
            if (chr == '.') {
                dot = 1;

                // double dot --> newline (new paragraph)
                if (*src == '.') {
                    COPYX('\n');
                    ++src;
                    ctr_para += 1;
                    continue;
                }

                COPY;

                white = 1;
                continue;
            }

            // from fgets, this can _only_ occur at the end of the buffer
            if (chr == '\n') {
                dot = 1;
                white = 1;
                COPY;
                break;
            }

            // accumulate/skip over whitespace
            if (chr == ' ') {
                white = 1;
                continue;
            }

            // output accumulated whitespace
            WHITEOUT;

            // got uppercase -- convert to lowercase if we're not at the start
            // of a sentence
            if (isupper(chr)) {
                if (! dot)
                    chr = tolower(chr);
            }

            // got lowercase -- capitalize if we're just starting a sentence
            else {
                if (islower(chr)) {
                    if (dot)
                        chr = toupper(chr);
                }
            }

            COPY;

            // count sentences
            if (dot)
                ctr_sent += 1;

            dot = 0;
        }

        *dst = 0;

        showbuf(inp,"inp");
        showbuf(buf,"buf");

#if 0
        if (dot)
            ctr_word += 1;
#endif

        printf("TOTAL: length=%zu sentences=%d paragraphs=%d words=%d\n",
            strlen(buf),ctr_sent,ctr_para,ctr_word);
    }

    fclose(xfsrc);

    return 0;
}

以下是程序输出：

inp: 'the LANGUAGE  "C" is a procedural              programming language     .It was initially developed by "Dennis Ritchie"..            the Main feAtures of "C" language include low-level access to memory, simple set of keywords, and clean style                .{0A}'
buf: 'The language "C" is a procedural programming language. It was initially developed by "Dennis Ritchie"{0A} The main features of "C" language include low-level access to memory, simple set of keywords, and clean style.{0A}'
TOTAL: length=214 sentences=3 paragraphs=2 words=31

更新：

太棒了，谢谢你！我已经把你做的做了，并在我的代码上实现了它，基本上，教授限制我们只使用stdio.h和string.h .h库，所以我不能使用任何其他函数.-尼台丹

不客气！

我很高兴你能把我的代码融入你的。这是所有可能的学习方案中最好的。

我不确定我是否添加了足够的注释来澄清我的算法，所以我对它进行了一些清理，并在争论是否要发布它。这是张贴在下面。

基于您刚才所说的，我仍然不确定是否允许创建您自己的函数。正如我说过的，如果调试函数不改变算法，那么调试函数可能是可以的。

总之，下面是更新的代码：

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>

int opt_d;                          // debug
int opt_quo;                        // preserve quote
int opt_x;                          // show space as hex

#define _dbgprt(_fmt...) \
    printf(_fmt)

#if DEBUG
#define dbgprt(_fmt...) \
    do { \
        if (opt_d) \
            _dbgprt(_fmt); \
    } while (0)
#else
#define dbgprt(_fmt...)         do { } while (0)
#endif

#if DEBUG
#define COPYX(_chr) \
    do { \
        *dst++ = _chr; \
        copy = 1; \
        if (_chr != src[-1]) \
            _dbgprt(" COPY %s\n",_showchr(_chr,1)); \
    } while (0)
#else
#define COPYX(_chr) \
    do { \
        *dst++ = _chr; \
    } while (0)
#endif

#define COPY \
    COPYX(chr)

// output accumulated white space and increment count of number of words
#define SPACEOUT \
    do { \
        if (! spc) \
            break; \
        spc = 0; \
        COPYX(' '); \
        ctr_word += 1; \
    } while (0)

const char *
_showchr(int chr,int xflg)
{
    static char buf[10];
    int lo = xflg ? 0x21 : 0x20;

    if ((chr >= lo) && (chr <= 0x7E))
        sprintf(buf,"%c",chr);
    else
        sprintf(buf,"{%2.2X}",chr);

    return buf;
}

const char *
showchr(int chr)
{

    return _showchr(chr,opt_x);
}

void
showbuf(const char *buf,const char *who)
{
    const char *sep = "'";

    printf("%s: %s",who,sep);

    for (int chr = *buf++;  chr != 0;  chr = *buf++)
        printf("%s",showchr(chr));

    printf("%s\n",sep);
}

void
dbgint(const char *sym,int val)
{
    do {
        int ctr = isupper(sym[0]);

        if (! ctr) {
            if (! val)
                break;
        }

        _dbgprt(" %s",sym);

        if (ctr)
            _dbgprt("%d",val);
    } while (0);
}

int
main(int argc,char **argv)
{
    char inp[1000];
    char buf[1000];
    char *src;
    char *dst;

    --argc;
    ++argv;

    for (;  argc > 0;  --argc, ++argv) {
        char *cp = *argv;
        if (*cp != '-')
            break;

        cp += 2;
        switch (cp[-1]) {
        case 'd':
            opt_d = ! opt_d;
            break;
        case 'q':
            opt_quo = ! opt_quo;
            break;
        case 'x':
            opt_x = (*cp != 0) ? atoi(cp) : 1;
            break;
        }
    }

    opt_quo = ! opt_quo;

    const char *file;
    if (argc > 0)
        file = *argv;
    else
        file = "inp.txt";

    FILE *xfsrc = fopen(file,"r");
    if (xfsrc == NULL) {
        perror(file);
        exit(1);
    }

    while (fgets(inp,sizeof(inp),xfsrc) != NULL) {
        strcpy(buf,inp);

        src = buf;
        dst = buf;

        // state variables
        int quo = 0;  // 1=within quoted string
        int spc = 0;  // 1=space seen
        int dot = 1;  // 1=period/newline seen

        // counters
        int ctr_sent = 0;  // number of sentences
        int ctr_word = 0;  // number of words
        int ctr_para = 1;  // number of paragraphs

#if DEBUG
        int copy = 0;
        int ochr = 0;
#endif

        for (int chr = *src++;  chr != 0;  chr = *src++) {
#if DEBUG
            if (opt_d) {
                // show if we skipped the prior char (and it was _not_ a space)
                if ((! copy) && (ochr != ' '))
                    _dbgprt("SKIP\n");
                copy = 0;
                ochr = chr;

                _dbgprt("LOOP %s",showchr(chr));
                dbgint("W:",ctr_word);
                dbgint("S:",ctr_sent);
                dbgint("P:",ctr_para);
                dbgint("quo",quo);
                dbgint("spc",spc);
                dbgint("dot",dot);
                _dbgprt("\n");
            }
#endif

            // got a quote
            if (chr == '"') {
                // flush whitespace if starting a quoted string
                if (! quo)
                    SPACEOUT;

                // copy the quote
                if (opt_quo)
                    COPY;

                // flip the quote mode
                quo = ! quo;
                continue;
            }

            // non-quote
            else {
                // if inside a quoted string, just copy out the char verbatim
                if (quo) {
                    COPY;
                    continue;
                }
            }

            // got a dot
            if (chr == '.') {
                dot = 1;

                // double dot --> newline (new paragraph)
                if (*src == '.') {
                    COPYX('\n');
                    ++src;
                    ctr_para += 1;
                    continue;
                }

                COPY;

                // force whitespace mode (ensure space after dot)
                // (e.g.) change:
                //   i go.he goes.
                // into:
                //   i go. he goes.
                spc = 1;
                continue;
            }

            // from fgets, this can _only_ occur at the end of the buffer
            if (chr == '\n') {
                dot = 1;
                spc = 1;
                COPY;
                break;
            }

            // accumulate/skip over whitespace
            if (chr == ' ') {
                spc = 1;
                continue;
            }

            // output accumulated whitespace
            SPACEOUT;

            // convert case
            if (dot)
                chr = toupper(chr);
            else
                chr = tolower(chr);

            // output the current character -- it's _not_ special
            COPY;

            // count sentences
            if (dot)
                ctr_sent += 1;

            // we're no longer at the start of a sentence
            dot = 0;
        }

        *dst = 0;

        if (opt_x == 1)
            opt_x = 0;
        showbuf(inp,"inp");
        showbuf(buf,"buf");

#if 0
        if (dot)
            ctr_word += 1;
#endif

        printf("TOTAL: length=%zu sentences=%d paragraphs=%d words=%d\n",
            strlen(buf),ctr_sent,ctr_para,ctr_word);
    }

    fclose(xfsrc);

    return 0;
}

用-d输出

SKIP
LOOP t W:0 S:0 P:1 dot
LOOP h W:0 S:1 P:1
LOOP e W:0 S:1 P:1
LOOP   W:0 S:1 P:1
LOOP L W:0 S:1 P:1 spc
 COPY {20}
LOOP A W:1 S:1 P:1
LOOP N W:1 S:1 P:1
LOOP G W:1 S:1 P:1
LOOP U W:1 S:1 P:1
LOOP A W:1 S:1 P:1
LOOP G W:1 S:1 P:1
LOOP E W:1 S:1 P:1
LOOP   W:1 S:1 P:1
LOOP   W:1 S:1 P:1 spc
LOOP " W:1 S:1 P:1 spc
 COPY {20}
LOOP C W:2 S:1 P:1 quo
LOOP " W:2 S:1 P:1 quo
LOOP   W:2 S:1 P:1
LOOP i W:2 S:1 P:1 spc
 COPY {20}
LOOP s W:3 S:1 P:1
LOOP   W:3 S:1 P:1
LOOP a W:3 S:1 P:1 spc
 COPY {20}
LOOP   W:4 S:1 P:1
LOOP p W:4 S:1 P:1 spc
 COPY {20}
LOOP r W:5 S:1 P:1
LOOP o W:5 S:1 P:1
LOOP c W:5 S:1 P:1
LOOP e W:5 S:1 P:1
LOOP d W:5 S:1 P:1
LOOP u W:5 S:1 P:1
LOOP r W:5 S:1 P:1
LOOP a W:5 S:1 P:1
LOOP l W:5 S:1 P:1
LOOP   W:5 S:1 P:1
LOOP   W:5 S:1 P:1 spc
LOOP   W:5 S:1 P:1 spc
LOOP   W:5 S:1 P:1 spc
LOOP   W:5 S:1 P:1 spc
LOOP   W:5 S:1 P:1 spc
LOOP   W:5 S:1 P:1 spc
LOOP   W:5 S:1 P:1 spc
LOOP   W:5 S:1 P:1 spc
LOOP   W:5 S:1 P:1 spc
LOOP   W:5 S:1 P:1 spc
LOOP   W:5 S:1 P:1 spc
LOOP   W:5 S:1 P:1 spc
LOOP   W:5 S:1 P:1 spc
LOOP p W:5 S:1 P:1 spc
 COPY {20}
LOOP r W:6 S:1 P:1
LOOP o W:6 S:1 P:1
LOOP g W:6 S:1 P:1
LOOP r W:6 S:1 P:1
LOOP a W:6 S:1 P:1
LOOP m W:6 S:1 P:1
LOOP m W:6 S:1 P:1
LOOP i W:6 S:1 P:1
LOOP n W:6 S:1 P:1
LOOP g W:6 S:1 P:1
LOOP   W:6 S:1 P:1
LOOP l W:6 S:1 P:1 spc
 COPY {20}
LOOP a W:7 S:1 P:1
LOOP n W:7 S:1 P:1
LOOP g W:7 S:1 P:1
LOOP u W:7 S:1 P:1
LOOP a W:7 S:1 P:1
LOOP g W:7 S:1 P:1
LOOP e W:7 S:1 P:1
LOOP   W:7 S:1 P:1
LOOP   W:7 S:1 P:1 spc
LOOP   W:7 S:1 P:1 spc
LOOP   W:7 S:1 P:1 spc
LOOP   W:7 S:1 P:1 spc
LOOP . W:7 S:1 P:1 spc
LOOP I W:7 S:1 P:1 spc dot
 COPY {20}
LOOP t W:8 S:2 P:1
LOOP   W:8 S:2 P:1
LOOP w W:8 S:2 P:1 spc
 COPY {20}
LOOP a W:9 S:2 P:1
LOOP s W:9 S:2 P:1
LOOP   W:9 S:2 P:1
LOOP i W:9 S:2 P:1 spc
 COPY {20}
LOOP n W:10 S:2 P:1
LOOP i W:10 S:2 P:1
LOOP t W:10 S:2 P:1
LOOP i W:10 S:2 P:1
LOOP a W:10 S:2 P:1
LOOP l W:10 S:2 P:1
LOOP l W:10 S:2 P:1
LOOP y W:10 S:2 P:1
LOOP   W:10 S:2 P:1
LOOP d W:10 S:2 P:1 spc
 COPY {20}
LOOP e W:11 S:2 P:1
LOOP v W:11 S:2 P:1
LOOP e W:11 S:2 P:1
LOOP l W:11 S:2 P:1
LOOP o W:11 S:2 P:1
LOOP p W:11 S:2 P:1
LOOP e W:11 S:2 P:1
LOOP d W:11 S:2 P:1
LOOP   W:11 S:2 P:1
LOOP b W:11 S:2 P:1 spc
 COPY {20}
LOOP y W:12 S:2 P:1
LOOP   W:12 S:2 P:1
LOOP " W:12 S:2 P:1 spc
 COPY {20}
LOOP D W:13 S:2 P:1 quo
LOOP e W:13 S:2 P:1 quo
LOOP n W:13 S:2 P:1 quo
LOOP n W:13 S:2 P:1 quo
LOOP i W:13 S:2 P:1 quo
LOOP s W:13 S:2 P:1 quo
LOOP   W:13 S:2 P:1 quo
LOOP R W:13 S:2 P:1 quo
LOOP i W:13 S:2 P:1 quo
LOOP t W:13 S:2 P:1 quo
LOOP c W:13 S:2 P:1 quo
LOOP h W:13 S:2 P:1 quo
LOOP i W:13 S:2 P:1 quo
LOOP e W:13 S:2 P:1 quo
LOOP " W:13 S:2 P:1 quo
LOOP . W:13 S:2 P:1
 COPY {0A}
LOOP   W:13 S:2 P:2 dot
LOOP   W:13 S:2 P:2 spc dot
LOOP   W:13 S:2 P:2 spc dot
LOOP   W:13 S:2 P:2 spc dot
LOOP   W:13 S:2 P:2 spc dot
LOOP   W:13 S:2 P:2 spc dot
LOOP   W:13 S:2 P:2 spc dot
LOOP   W:13 S:2 P:2 spc dot
LOOP   W:13 S:2 P:2 spc dot
LOOP   W:13 S:2 P:2 spc dot
LOOP   W:13 S:2 P:2 spc dot
LOOP   W:13 S:2 P:2 spc dot
LOOP t W:13 S:2 P:2 spc dot
 COPY {20}
 COPY T
LOOP h W:14 S:3 P:2
LOOP e W:14 S:3 P:2
LOOP   W:14 S:3 P:2
LOOP M W:14 S:3 P:2 spc
 COPY {20}
 COPY m
LOOP a W:15 S:3 P:2
LOOP i W:15 S:3 P:2
LOOP n W:15 S:3 P:2
LOOP   W:15 S:3 P:2
LOOP f W:15 S:3 P:2 spc
 COPY {20}
LOOP e W:16 S:3 P:2
LOOP A W:16 S:3 P:2
 COPY a
LOOP t W:16 S:3 P:2
LOOP u W:16 S:3 P:2
LOOP r W:16 S:3 P:2
LOOP e W:16 S:3 P:2
LOOP s W:16 S:3 P:2
LOOP   W:16 S:3 P:2
LOOP o W:16 S:3 P:2 spc
 COPY {20}
LOOP f W:17 S:3 P:2
LOOP   W:17 S:3 P:2
LOOP " W:17 S:3 P:2 spc
 COPY {20}
LOOP C W:18 S:3 P:2 quo
LOOP " W:18 S:3 P:2 quo
LOOP   W:18 S:3 P:2
LOOP l W:18 S:3 P:2 spc
 COPY {20}
LOOP a W:19 S:3 P:2
LOOP n W:19 S:3 P:2
LOOP g W:19 S:3 P:2
LOOP u W:19 S:3 P:2
LOOP a W:19 S:3 P:2
LOOP g W:19 S:3 P:2
LOOP e W:19 S:3 P:2
LOOP   W:19 S:3 P:2
LOOP i W:19 S:3 P:2 spc
 COPY {20}
LOOP n W:20 S:3 P:2
LOOP c W:20 S:3 P:2
LOOP l W:20 S:3 P:2
LOOP u W:20 S:3 P:2
LOOP d W:20 S:3 P:2
LOOP e W:20 S:3 P:2
LOOP   W:20 S:3 P:2
LOOP l W:20 S:3 P:2 spc
 COPY {20}
LOOP o W:21 S:3 P:2
LOOP w W:21 S:3 P:2
LOOP - W:21 S:3 P:2
LOOP l W:21 S:3 P:2
LOOP e W:21 S:3 P:2
LOOP v W:21 S:3 P:2
LOOP e W:21 S:3 P:2
LOOP l W:21 S:3 P:2
LOOP   W:21 S:3 P:2
LOOP a W:21 S:3 P:2 spc
 COPY {20}
LOOP c W:22 S:3 P:2
LOOP c W:22 S:3 P:2
LOOP e W:22 S:3 P:2
LOOP s W:22 S:3 P:2
LOOP s W:22 S:3 P:2
LOOP   W:22 S:3 P:2
LOOP t W:22 S:3 P:2 spc
 COPY {20}
LOOP o W:23 S:3 P:2
LOOP   W:23 S:3 P:2
LOOP m W:23 S:3 P:2 spc
 COPY {20}
LOOP e W:24 S:3 P:2
LOOP m W:24 S:3 P:2
LOOP o W:24 S:3 P:2
LOOP r W:24 S:3 P:2
LOOP y W:24 S:3 P:2
LOOP , W:24 S:3 P:2
LOOP   W:24 S:3 P:2
LOOP s W:24 S:3 P:2 spc
 COPY {20}
LOOP i W:25 S:3 P:2
LOOP m W:25 S:3 P:2
LOOP p W:25 S:3 P:2
LOOP l W:25 S:3 P:2
LOOP e W:25 S:3 P:2
LOOP   W:25 S:3 P:2
LOOP s W:25 S:3 P:2 spc
 COPY {20}
LOOP e W:26 S:3 P:2
LOOP t W:26 S:3 P:2
LOOP   W:26 S:3 P:2
LOOP o W:26 S:3 P:2 spc
 COPY {20}
LOOP f W:27 S:3 P:2
LOOP   W:27 S:3 P:2
LOOP k W:27 S:3 P:2 spc
 COPY {20}
LOOP e W:28 S:3 P:2
LOOP y W:28 S:3 P:2
LOOP w W:28 S:3 P:2
LOOP o W:28 S:3 P:2
LOOP r W:28 S:3 P:2
LOOP d W:28 S:3 P:2
LOOP s W:28 S:3 P:2
LOOP , W:28 S:3 P:2
LOOP   W:28 S:3 P:2
LOOP a W:28 S:3 P:2 spc
 COPY {20}
LOOP n W:29 S:3 P:2
LOOP d W:29 S:3 P:2
LOOP   W:29 S:3 P:2
LOOP c W:29 S:3 P:2 spc
 COPY {20}
LOOP l W:30 S:3 P:2
LOOP e W:30 S:3 P:2
LOOP a W:30 S:3 P:2
LOOP n W:30 S:3 P:2
LOOP   W:30 S:3 P:2
LOOP s W:30 S:3 P:2 spc
 COPY {20}
LOOP t W:31 S:3 P:2
LOOP y W:31 S:3 P:2
LOOP l W:31 S:3 P:2
LOOP e W:31 S:3 P:2
LOOP   W:31 S:3 P:2
LOOP   W:31 S:3 P:2 spc
LOOP   W:31 S:3 P:2 spc
LOOP   W:31 S:3 P:2 spc
LOOP   W:31 S:3 P:2 spc
LOOP   W:31 S:3 P:2 spc
LOOP   W:31 S:3 P:2 spc
LOOP   W:31 S:3 P:2 spc
LOOP   W:31 S:3 P:2 spc
LOOP   W:31 S:3 P:2 spc
LOOP   W:31 S:3 P:2 spc
LOOP   W:31 S:3 P:2 spc
LOOP   W:31 S:3 P:2 spc
LOOP   W:31 S:3 P:2 spc
LOOP   W:31 S:3 P:2 spc
LOOP   W:31 S:3 P:2 spc
LOOP . W:31 S:3 P:2 spc
LOOP {0A} W:31 S:3 P:2 spc dot
inp: 'the LANGUAGE  "C" is a procedural              programming language     .It was initially developed by "Dennis Ritchie"..            the Main feAtures of "C" language include low-level access to memory, simple set of keywords, and clean style                .{0A}'
buf: 'The language "C" is a procedural programming language. It was initially developed by "Dennis Ritchie"{0A} The main features of "C" language include low-level access to memory, simple set of keywords, and clean style.{0A}'
TOTAL: length=214 sentences=3 paragraphs=2 words=31

票数 1

Stack Overflow用户

发布于 2022-05-11 18:59:31

这里有一个可能的解决办法。

希望这将表明你不需要所有的重复。

我只用给出的例子对它进行了测试，很可能仍然有边缘情况下它可能会崩溃。您可能希望分配buffer，而不是使用特定的值，但是您可能需要检查是否有可能导致扩展的输入。

关于正确位置的大写，除了一行以外，原文中没有任何范围，因此没有段落的概念。因此，我在句子的开头就选择了大写字母。

NB: OP没有指定正确的输出是什么，因为文章的标题是“错误的字符计数”，所以这是基于需求和OP代码的一些洞察力的最佳猜测(正如所述，这可能没有产生正确的结果)。

我不认为这里的重点是修复OP的bug，而是说明接近或实现解决方案的变通方法。

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>

char original[] = "the LANGUAGE  \"C\" is a procedural              programming language     .It was initially developed by \"Dennis Ritchie\"..            the Main feAtures of \"C\" language include low-level access to memory, simple set of keywords, and clean style                .";

int main() {
    
    char buffer[256];
    strcpy(buffer, original);
    char *src, *dest;
    
    bool quoted = false;
    bool sentence = false;
    int periods = 0;
    bool space = false;
    bool paragraph = true;
    bool comma = false;
    int letters = 0;
    int words = 0;
    int sentences = 0;
    int paragraphs = 0;
    
    src = dest = buffer;

    for ( ; *src ; src++) {
        
        if (quoted) {

            switch (*src) {
                
            case '"': 
                quoted = false;
                break;
                
            }
        
            if ((*src >= 'a' && *src <= 'z') || (*src >= 'a' && *src <= 'z')) {
                letters++;
            } else {
                if (letters) {
                    words++;
                    letters = 0;
                }
            }
            
            *dest++ = *src;

        } else {
            
            switch (*src) {
                
            case '"':
                quoted = true;
                break;
                
            case ',':
                comma = true;
                continue;

            case ' ':
                if (letters) {
                    words++;
                    letters = 0;
                }
                space = true;
                continue;

            case '.':
                if (++periods == 2) {
                    *dest++ = '.';
                    *dest++ = '\n';
                    periods = 0;
                    paragraph = true;
                } else {
                    sentence = true;
                }
                continue;
            }
            
            if (comma) {
                *dest++ = ',';
                *dest++ = ' ';
                comma = space = false;
            }
            
            if (periods) {
                *dest++ = '.';
                periods = 0;
            }

            if (space) {
                if (!paragraph) {
                    *dest++ = ' ';
                }
                space = false;
            }
            
            if ((*src >= 'a' && *src <= 'z') || (*src >= 'a' && *src <= 'z')) {
                letters++;
            } else {
                if (letters) {
                    words++;
                    letters = 0;
                }
            }
            
            *dest++ = sentence || paragraph ? toupper(*src) : tolower(*src);

            if (sentence || paragraph) {
                if (letters) {
                    words++;
                }
                letters = 0;
            }
            
            if (sentence) {
                sentences++;
            }
            
            if (paragraph) {
                paragraphs++;
            }
            
            sentence = paragraph = false;
        }

    }
    
    if (sentence) {
        sentences++;
    }
    
    if (paragraph) {
        paragraphs++;
    }
    
    if (periods) {
        *dest++ = '.';
    }

    *dest++ = '\n';

    *dest = '\0';

    printf("\nInput Chars=%d\n\n\"%s\"\n", (int)strlen(original), original);
    printf("\nOutput Chars=%d, Words=%d, Sentences=%d, Paragraphs=%d\n\n\"%s\"\n", (int)strlen(buffer), words, sentences, paragraphs, buffer);
        
    return 0;
}

这就产生了：

Input Chars=259

"the LANGUAGE  "C" is a procedural              programming language     .It was initially developed by "Dennis Ritchie"..            the Main feAtures of "C" language include low-level access to memory, simple set of keywords, and clean style                ."

Output Chars=214, Words=34, Sentences=3, Paragraphs=2

"The language "C" is a procedural programming language. It was initially developed by "Dennis Ritchie".
The main features of "C" language include low-level access to memory, simple set of keywords, and clean style.
"

票数 1

页面原文内容由Stack Overflow提供。腾讯云小微IT领域专用引擎提供翻译支持

原文链接：

https://stackoverflow.com/questions/72203448

复制

相似问题

问C文本格式化程序字符计数已关闭
EN

回答 2

Stack Overflow用户

Stack Overflow用户

社区

活动

圈层

关于

腾讯云开发者

热门产品

热门推荐

更多推荐

问C文本格式化程序字符计数已关闭EN

回答 2

Stack Overflow用户

Stack Overflow用户

社区

活动

圈层

关于

腾讯云开发者

热门产品

热门推荐

更多推荐

问C文本格式化程序字符计数已关闭
EN