git.rcrnstn.net - rcrnstn/fzy

Browse code

Adjust scoring system

-0.05 for a skipped character in the candidate.
+1 for a match following a previous match
+1.5 for a match at the beginning of a word
No change for any other match.

John Hawthorn authored on 27/07/2014 05:38:37
Showing 2 changed files

fzytest.c index 130acd4..79fa39c 100644
match.c index 68e8c3e..f7c1b7d 100644

fzytest.c

History View file @ ae67b76

@@ -62,6 +62,15 @@ int test_positions_2(){
                      	return 0;
+                     }
                     +int test_positions_3(){
                     +	size_t positions[2];
                     +	match_positions("as", "tags", positions);
                     +	assert(positions[0] == 1);
                     +	assert(positions[1] == 3);
+                    +
                     +	return 0;
                     +}
+                    +
                      int test_positions_exact(){
                      	size_t positions[3];
                      	match_positions("foo", "foo", positions);
@@ -84,6 +93,7 @@ int main(int argc, char *argv[]){
                      	runtest(test_scoring);
                      	runtest(test_positions_1);
                      	runtest(test_positions_2);
                     +	runtest(test_positions_3);
                      	runtest(test_positions_exact);
                      	summary();

match.c

History View file @ ae67b76

@@ -29,7 +29,7 @@ void mat_print(int *mat, int n, int m){
+                     }
                      #define max(a, b) (((a) > (b)) ? (a) : (b))
                     -typedef int score_t;
                     +typedef double score_t;
                      #define SCORE_MAX DBL_MAX
                      #define SCORE_MIN -DBL_MAX
@@ -50,7 +50,7 @@ double calculate_score(const char *needle, const char *haystack, size_t *positio
+                     	}
                      	int bow[m];
                     -	int D[n][m], M[n][m];
                     +	score_t D[n][m], M[n][m];
                      	bzero(D, sizeof(D));
                      	bzero(M, sizeof(M));
@@ -72,39 +72,38 @@ double calculate_score(const char *needle, const char *haystack, size_t *positio
                      	for(int i = 0; i < n; i++){
                      		for(int j = 0; j < m; j++){
                     +			D[i][j] = SCORE_MIN;
                      			int match = tolower(needle[i]) == tolower(haystack[j]);
                      			if(match){
                      				score_t score = 0;
                      				if(i && j)
                      					score = M[i-1][j-1];
                      				if(bow[j])
                     -					score += 2;
                     +					score += 1.5;
                      				else if(i && j && D[i-1][j-1])
                      					score = max(score, 1 + D[i-1][j-1]);
                      				M[i][j] = D[i][j] = score;
+                     			}
                      			if(j)
                     -				M[i][j] = max(M[i][j], M[i][j-1]);
                     +				M[i][j] = max(M[i][j], M[i][j-1] - 0.05);
+                     		}
+                     	}
                      	/* backtrace to find the positions of optimal matching */
                      	if(positions){
                      		for(int i = n-1, j = m-1; i >= 0; i--){
                     -			int last = M[i][j];
                     -			for(; j >= 0 && M[i][j] == last; j--){
                     +			for(; j >= 0; j--){
                      				/*
                      				 * There may be multiple paths which result in
                      				 * the optimal weight.
+                     				 *
                     -				 * Since we don't exit the loop on the first
                     -				 * match, positions[i] may be assigned to
                     -				 * multiple times. Since we are decrementing i
                     -				 * and j, this favours the optimal path
                     -				 * occurring earlier in the string.
                     +				 * For simplicity, we will pick the first one
                     +				 * we encounter, the latest in the candidate
                     +				 * string.
                      				 */
                     -				if(tolower(needle[i]) == tolower(haystack[j])){
                     +				if(D[i][j] == M[i][j]){
                      					positions[i] = j;
                     +					break;
+                     				}
+                     			}
+                     		}