pad.mattdiesel.co.uk

Snippet - Closest Match to String in C

Closest Match to String in C (C)


Created 2016-05-19 22:32:34.959290 by Matt.

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
#include <stdint.h>
#include <stdlib.h>
#include <stdio.h>
#include <inttypes.h>

size_t match ( const char* string, uint64_t table[][128] ) ;

int main ()
{
    const char* data[] = { "copyright", "license", "version", "commands", "commandline" };
    const char* examples[] = { "copyrihgt", "liecnes", "commandline", "ver" };
    const unsigned int N = 10;
    const unsigned int M = 4;
	unsigned int i, j;
		
    // prepare a table
    unsigned long table[10][128] = { 0 };

    for ( i = 0; i < M; ++i )
        for ( j = 0; j < N; ++j )
            table[j][data[i][j]] |= 1 << (i * 4);


    for ( i = 0; i < 4; ++i ) {
        const char* q = examples[i];
        size_t result = match ( q, table );

        printf("Q(%s) -> %zd %s\n", q, result, data[result]);
    }
}

size_t match ( const char* string, uint64_t table[][128] )
{
    uint64_t count = 0;
    size_t i;

    // scan through string once, updating all counters at once
    for ( i = 0; string[i]; ++i )
        count += table[i][ (size_t) string[i] ];

    // find greatest sub-count within count
    size_t best = 0;
    size_t best_sub_count = count & 0xf;

    for ( i = 1; i < 4; ++i ) {
        size_t sub_count = ( count >>= 4 ) & 0xf;

        if ( sub_count > best_sub_count ) {
            best_sub_count = sub_count;
            best = i;
        }
    }

    return best;
}