Это несколько случайно, но у меня было несколько функций, которые по существу делают то, что вы пытаетесь сделать. Это может быть не идеально, но следующее с чтением 2 файлов, загружать строки каждого в массив указателей на символ, затем разбивать каждую строку на токены и сравнивать каждый из соответствующих токенов, чтобы определить, отличается ли орфография и выводит слова на каждой строке, которые не написаны одинаково.
Он может предоставить вам несколько дополнительных идей о том, как подойти к вашей проблеме. Примечание, это в качестве примера и не представлено для полного тестирования для всех угловых шкафов и т. Д. Поскольку вы динамически распределяли хранилище, это помогает продолжить этот подход в токенизации каждой строки. Функция, которая полностью символизирует каждую строку и возвращает слова в массиве указателей на символ, существенно сокращает количество и тип вложенных циклов. Для чего это стоит, посмотрите. Также обратите внимание, что функция prn_chararray
не используется в коде ниже, но остается в качестве удобства:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define NMAX 256
#define BUFL 64
#define MAXS 32
char **readtxtfile (char *fn, size_t *idx);
char **splitstr (char *s, size_t *n);
char **realloc_char (char **p, size_t *n);
void prn_chararray (char **ca);
void free_chararray (char **ca);
int main (int argc, char **argv) {
if (argc < 3) {
fprintf (stderr, "error: insufficient input, usage: %s <filename1> <filename2>\n", argv[0]);
return 1;
}
size_t file1_size = 0; /* placeholders to be filled by readtxtfile */
size_t file2_size = 0; /* for general use, not needed to iterate */
size_t i = 0; /* general counter/iterator */
size_t linemin = 0; /* minimum of comparison lines in file1/2 */
/* read each file into an array of strings,
number of lines read, returned in file_size */
char **file1 = readtxtfile (argv[1], &file1_size);
char **file2 = readtxtfile (argv[2], &file2_size);
linemin = file1_size < file2_size ? file1_size : file2_size;
for (i = 0; i < linemin; i++)
{
size_t nwords1 = 0; /* number of words read file1 line */
size_t nwords2 = 0; /* number of words read file2 line */
size_t wordmin = 0; /* minimum number of words in file1/2 lines */
size_t j = 0; /* general counter/iterator */
printf ("\n file1[%2zu] : %s\n file2[%2zu] : %s\n\n", i, file1[i], i, file2[i]);
char **f1words = splitstr (file1[i], &nwords1);
char **f2words = splitstr (file2[i], &nwords2);
if (!f1words || !f2words) {
fprintf (stderr, "error: word splitting falure.\n");
continue;
}
wordmin = nwords1 < nwords2 ? nwords1 : nwords2;
for (j = 0; j < wordmin; j++)
{
if (strcmp (f1words[j], f2words[j]))
printf (" %16s != %s\n", f1words[j], f2words[j]);
}
free_chararray (f1words);
free_chararray (f2words);
f1words = NULL;
f2words = NULL;
}
/* simple free memory function */
if (file1) free_chararray (file1);
if (file2) free_chararray (file2);
return 0;
}
char** readtxtfile (char *fn, size_t *idx)
{
if (!fn) return NULL; /* validate filename provided */
char *ln = NULL; /* NULL forces getline to allocate */
size_t n = 0; /* max chars to read (0 - no limit) */
ssize_t nchr = 0; /* number of chars actually read */
size_t nmax = NMAX; /* check for reallocation */
char **array = NULL; /* array to hold lines read */
FILE *fp = NULL; /* file pointer to open file fn */
/* open/validate file */
if (!(fp = fopen (fn, "r"))) {
fprintf (stderr, "%s() error: file open failed '%s'.", __func__, fn);
return NULL;
}
/* allocate NMAX pointers to char* */
if (!(array = calloc (NMAX, sizeof *array))) {
fprintf (stderr, "%s() error: memory allocation failed.", __func__);
return NULL;
}
/* read each line from stdin - dynamicallly allocated */
while ((nchr = getline (&ln, &n, fp)) != -1)
{
/* strip newline or carriage rtn */
while (nchr > 0 && (ln[nchr-1] == '\n' || ln[nchr-1] == '\r'))
ln[--nchr] = 0;
array[*idx] = strdup (ln); /* allocate/copy ln to array */
(*idx)++; /* increment value at index */
if (*idx == nmax) /* if lines exceed nmax, reallocate */
array = realloc_char (array, &nmax);
}
if (ln) free (ln); /* free memory allocated by getline */
if (fp) fclose (fp); /* close open file descriptor */
return array;
}
/* split string 's' into separate words including break on
space as well as non-printing and format characters
return pointer to array of pointers to strings 'a' and
number of words in 'n' */
char **splitstr (char *s, size_t *n)
{
if (!s || !*s) return NULL;
char *p = s; /* pointer to char */
char buf[BUFL] = {0}; /* temporary buffer */
char *bp = buf; /* pointer to buf */
size_t maxs = MAXS; /* check for reallocation */
*n = 0; /* index number of tokens */
/* allocate and validate array of pointer to char */
char **a = calloc (MAXS, sizeof *a);
if (!a) {
fprintf (stderr, "%s() error: memory allocation failed.\n", __func__);
return NULL;
}
while (*p) /* for each char in string1 */
{
/* skip each non-print/format char */
while (*p && (*p <= ' ' || *p > '~'))
p++;
if (!*p) break; /* break if end reached */
while (*p > ' ' && *p <= '~') /* for each printable char */
{
*bp = *p++; /* copy to strings buffer */
bp++; /* advance to nex position */
}
*bp = 0; /* null-terminate strings */
a[*n] = strdup (buf); /* alloc/copy buf to a[*n] */
(*n)++; /* next index in strings */
if (*n == maxs) /* check if *n exceeds maxs */
a = realloc_char (a, &maxs); /* realloc if a if reqd */
bp = buf; /* reset bp to start of buf */
}
return a;
}
/* print an array of character pointers. */
void prn_chararray (char **ca)
{
register size_t n = 0;
while (ca[n])
{
printf (" arr[%3zu] %s\n", n, ca[n]);
n++;
}
}
/* free array of char* */
void free_chararray (char **ca)
{
if (!ca) return;
register size_t n = 0;
while (ca[n])
free (ca[n++]);
free (ca);
}
/* realloc an array of pointers to strings setting memory to 0.
* reallocate an array of character arrays setting
* newly allocated memory to 0 to allow iteration
*/
char **realloc_char (char **p, size_t *n)
{
#ifdef DEBUG
printf ("\n reallocating %zu to %zu (size: %lu)\n", *n, *n * 2, 2 * *n * sizeof *p);
#endif
char **tmp = realloc (p, 2 * *n * sizeof *p);
if (!tmp) {
fprintf (stderr, "%s() error: reallocation failure.\n", __func__);
// return NULL;
exit (EXIT_FAILURE);
}
p = tmp;
memset (p + *n, 0, *n * sizeof *p); /* memset new ptrs 0 */
*n *= 2;
return p;
}
входных файлов
$ cat dat/words1.txt
Eye have a spelling chequer,
It came with my Pea Sea.
It plane lee marks four my revue,
Miss Steaks I can knot sea.
Eye strike the quays and type a whirred,
And weight four it two say,
Weather eye am write oar wrong,
It tells me straight aweigh.
Eye ran this poem threw it,
Your shore real glad two no.
Its vary polished in its weigh.
My chequer tolled me sew.
A chequer is a bless thing,
It freeze yew lodes of thyme.
It helps me right all stiles of righting,
And aides me when eye rime.
Each frays come posed on my screen,
Eye trussed too bee a joule.
The chequer pours over every word,
Two cheque sum spelling rule.
$ cat dat/words2.txt
I have a spelling checker,
It came with my Pin See.
It plainly skips marks for my revue,
Mistakes skip I can not see.
I strike the keys and type a word,
And wait for it to say,
Whether I am right or wrong,
It tells me straight away.
I ran this poem through it,
Your are real glad too no.
Its very polished in its way.
My checker told me so.
A checker is a blessed thing,
It frees you lots of time.
It helps me write all styles of writing,
And helps me when I rhyme.
Each pharse composed up on my screen,
I trust too bee a jewel.
The checker pours over every word,
Two check some spelling rule.
Выход
$ ./bin/getline_cmplines dat/words1.txt dat/words2.txt
file1[ 0] : Eye have a spelling chequer,
file2[ 0] : I have a spelling checker,
Eye != I
chequer, != checker,
file1[ 1] : It came with my Pea Sea.
file2[ 1] : It came with my Pin See.
Pea != Pin
Sea. != See.
file1[ 2] : It plane lee marks four my revue,
file2[ 2] : It plainly skips marks for my revue,
plane != plainly
lee != skips
four != for
file1[ 3] : Miss Steaks I can knot sea.
file2[ 3] : Mistakes skip I can not see.
Miss != Mistakes
Steaks != skip
knot != not
sea. != see.
file1[ 4] : Eye strike the quays and type a whirred,
file2[ 4] : I strike the keys and type a word,
Eye != I
quays != keys
whirred, != word,
file1[ 5] : And weight four it two say,
file2[ 5] : And wait for it to say,
weight != wait
four != for
two != to
file1[ 6] : Weather eye am write oar wrong,
file2[ 6] : Whether I am right or wrong,
Weather != Whether
eye != I
write != right
oar != or
<snip>
Проверка утечки
$ valgrind ./bin/getline_cmplines dat/words1.txt dat/words2.txt
==5670== Memcheck, a memory error detector
==5670== Copyright (C) 2002-2012, and GNU GPL'd, by Julian Seward et al.
==5670== Using Valgrind-3.8.1 and LibVEX; rerun with -h for copyright info
==5670== Command: ./bin/getline_cmplines dat/words1.txt dat/words2.txt
==5670==
file1[ 0] : Eye have a spelling chequer,
file2[ 0] : I have a spelling checker,
Eye != I
chequer, != checker,
<snip>
==5670==
==5670== HEAP SUMMARY:
==5670== in use at exit: 0 bytes in 0 blocks
==5670== total heap usage: 330 allocs, 330 frees, 18,138 bytes allocated
==5670==
==5670== All heap blocks were freed -- no leaks are possible
==5670==
==5670== For counts of detected and suppressed errors, rerun with: -v
==5670== ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 2 from 2)
В случае несвязанного примечания 'fgets' может * не * добавлять новую строку в буфер, если размер буфера мал, чтобы соответствовать всей строке. Сначала вы должны проверить его. –
Кроме того, нет необходимости в выделенной временной памяти, вы можете сравнить, например. 'tempToken' и' tempDictBuffer' напрямую. –
вам нужно «перематывать» для словарного файла каждое тестовое слово. – BLUEPIXY