Browse code

Adjust scoring system

-0.05 for a skipped character in the candidate.
+1 for a match following a previous match
+1.5 for a match at the beginning of a word
No change for any other match.

John Hawthorn authored on 27/07/2014 05:38:37
Showing 2 changed files

... ...
@@ -62,6 +62,15 @@ int test_positions_2(){
62 62
 	return 0;
63 63
 }
64 64
 
65
+int test_positions_3(){
66
+	size_t positions[2];
67
+	match_positions("as", "tags", positions);
68
+	assert(positions[0] == 1);
69
+	assert(positions[1] == 3);
70
+
71
+	return 0;
72
+}
73
+
65 74
 int test_positions_exact(){
66 75
 	size_t positions[3];
67 76
 	match_positions("foo", "foo", positions);
... ...
@@ -84,6 +93,7 @@ int main(int argc, char *argv[]){
84 93
 	runtest(test_scoring);
85 94
 	runtest(test_positions_1);
86 95
 	runtest(test_positions_2);
96
+	runtest(test_positions_3);
87 97
 	runtest(test_positions_exact);
88 98
 
89 99
 	summary();
... ...
@@ -29,7 +29,7 @@ void mat_print(int *mat, int n, int m){
29 29
 }
30 30
 
31 31
 #define max(a, b) (((a) > (b)) ? (a) : (b))
32
-typedef int score_t;
32
+typedef double score_t;
33 33
 #define SCORE_MAX DBL_MAX
34 34
 #define SCORE_MIN -DBL_MAX
35 35
 
... ...
@@ -50,7 +50,7 @@ double calculate_score(const char *needle, const char *haystack, size_t *positio
50 50
 	}
51 51
 
52 52
 	int bow[m];
53
-	int D[n][m], M[n][m];
53
+	score_t D[n][m], M[n][m];
54 54
 	bzero(D, sizeof(D));
55 55
 	bzero(M, sizeof(M));
56 56
 
... ...
@@ -72,39 +72,38 @@ double calculate_score(const char *needle, const char *haystack, size_t *positio
72 72
 
73 73
 	for(int i = 0; i < n; i++){
74 74
 		for(int j = 0; j < m; j++){
75
+			D[i][j] = SCORE_MIN;
75 76
 			int match = tolower(needle[i]) == tolower(haystack[j]);
76 77
 			if(match){
77 78
 				score_t score = 0;
78 79
 				if(i && j)
79 80
 					score = M[i-1][j-1];
80 81
 				if(bow[j])
81
-					score += 2;
82
+					score += 1.5;
82 83
 				else if(i && j && D[i-1][j-1])
83 84
 					score = max(score, 1 + D[i-1][j-1]);
84 85
 				M[i][j] = D[i][j] = score;
85 86
 			}
86 87
 			if(j)
87
-				M[i][j] = max(M[i][j], M[i][j-1]);
88
+				M[i][j] = max(M[i][j], M[i][j-1] - 0.05);
88 89
 		}
89 90
 	}
90 91
 
91 92
 	/* backtrace to find the positions of optimal matching */
92 93
 	if(positions){
93 94
 		for(int i = n-1, j = m-1; i >= 0; i--){
94
-			int last = M[i][j];
95
-			for(; j >= 0 && M[i][j] == last; j--){
95
+			for(; j >= 0; j--){
96 96
 				/*
97 97
 				 * There may be multiple paths which result in
98 98
 				 * the optimal weight.
99 99
 				 *
100
-				 * Since we don't exit the loop on the first
101
-				 * match, positions[i] may be assigned to
102
-				 * multiple times. Since we are decrementing i
103
-				 * and j, this favours the optimal path
104
-				 * occurring earlier in the string.
100
+				 * For simplicity, we will pick the first one
101
+				 * we encounter, the latest in the candidate
102
+				 * string.
105 103
 				 */
106
-				if(tolower(needle[i]) == tolower(haystack[j])){
104
+				if(D[i][j] == M[i][j]){
107 105
 					positions[i] = j;
106
+					break;
108 107
 				}
109 108
 			}
110 109
 		}