-0.05 for a skipped character in the candidate.
+1 for a match following a previous match
+1.5 for a match at the beginning of a word
No change for any other match.
| ... | ... |
@@ -62,6 +62,15 @@ int test_positions_2(){
|
| 62 | 62 |
return 0; |
| 63 | 63 |
} |
| 64 | 64 |
|
| 65 |
+int test_positions_3(){
|
|
| 66 |
+ size_t positions[2]; |
|
| 67 |
+ match_positions("as", "tags", positions);
|
|
| 68 |
+ assert(positions[0] == 1); |
|
| 69 |
+ assert(positions[1] == 3); |
|
| 70 |
+ |
|
| 71 |
+ return 0; |
|
| 72 |
+} |
|
| 73 |
+ |
|
| 65 | 74 |
int test_positions_exact(){
|
| 66 | 75 |
size_t positions[3]; |
| 67 | 76 |
match_positions("foo", "foo", positions);
|
| ... | ... |
@@ -84,6 +93,7 @@ int main(int argc, char *argv[]){
|
| 84 | 93 |
runtest(test_scoring); |
| 85 | 94 |
runtest(test_positions_1); |
| 86 | 95 |
runtest(test_positions_2); |
| 96 |
+ runtest(test_positions_3); |
|
| 87 | 97 |
runtest(test_positions_exact); |
| 88 | 98 |
|
| 89 | 99 |
summary(); |
| ... | ... |
@@ -29,7 +29,7 @@ void mat_print(int *mat, int n, int m){
|
| 29 | 29 |
} |
| 30 | 30 |
|
| 31 | 31 |
#define max(a, b) (((a) > (b)) ? (a) : (b)) |
| 32 |
-typedef int score_t; |
|
| 32 |
+typedef double score_t; |
|
| 33 | 33 |
#define SCORE_MAX DBL_MAX |
| 34 | 34 |
#define SCORE_MIN -DBL_MAX |
| 35 | 35 |
|
| ... | ... |
@@ -50,7 +50,7 @@ double calculate_score(const char *needle, const char *haystack, size_t *positio |
| 50 | 50 |
} |
| 51 | 51 |
|
| 52 | 52 |
int bow[m]; |
| 53 |
- int D[n][m], M[n][m]; |
|
| 53 |
+ score_t D[n][m], M[n][m]; |
|
| 54 | 54 |
bzero(D, sizeof(D)); |
| 55 | 55 |
bzero(M, sizeof(M)); |
| 56 | 56 |
|
| ... | ... |
@@ -72,39 +72,38 @@ double calculate_score(const char *needle, const char *haystack, size_t *positio |
| 72 | 72 |
|
| 73 | 73 |
for(int i = 0; i < n; i++){
|
| 74 | 74 |
for(int j = 0; j < m; j++){
|
| 75 |
+ D[i][j] = SCORE_MIN; |
|
| 75 | 76 |
int match = tolower(needle[i]) == tolower(haystack[j]); |
| 76 | 77 |
if(match){
|
| 77 | 78 |
score_t score = 0; |
| 78 | 79 |
if(i && j) |
| 79 | 80 |
score = M[i-1][j-1]; |
| 80 | 81 |
if(bow[j]) |
| 81 |
- score += 2; |
|
| 82 |
+ score += 1.5; |
|
| 82 | 83 |
else if(i && j && D[i-1][j-1]) |
| 83 | 84 |
score = max(score, 1 + D[i-1][j-1]); |
| 84 | 85 |
M[i][j] = D[i][j] = score; |
| 85 | 86 |
} |
| 86 | 87 |
if(j) |
| 87 |
- M[i][j] = max(M[i][j], M[i][j-1]); |
|
| 88 |
+ M[i][j] = max(M[i][j], M[i][j-1] - 0.05); |
|
| 88 | 89 |
} |
| 89 | 90 |
} |
| 90 | 91 |
|
| 91 | 92 |
/* backtrace to find the positions of optimal matching */ |
| 92 | 93 |
if(positions){
|
| 93 | 94 |
for(int i = n-1, j = m-1; i >= 0; i--){
|
| 94 |
- int last = M[i][j]; |
|
| 95 |
- for(; j >= 0 && M[i][j] == last; j--){
|
|
| 95 |
+ for(; j >= 0; j--){
|
|
| 96 | 96 |
/* |
| 97 | 97 |
* There may be multiple paths which result in |
| 98 | 98 |
* the optimal weight. |
| 99 | 99 |
* |
| 100 |
- * Since we don't exit the loop on the first |
|
| 101 |
- * match, positions[i] may be assigned to |
|
| 102 |
- * multiple times. Since we are decrementing i |
|
| 103 |
- * and j, this favours the optimal path |
|
| 104 |
- * occurring earlier in the string. |
|
| 100 |
+ * For simplicity, we will pick the first one |
|
| 101 |
+ * we encounter, the latest in the candidate |
|
| 102 |
+ * string. |
|
| 105 | 103 |
*/ |
| 106 |
- if(tolower(needle[i]) == tolower(haystack[j])){
|
|
| 104 |
+ if(D[i][j] == M[i][j]){
|
|
| 107 | 105 |
positions[i] = j; |
| 106 |
+ break; |
|
| 108 | 107 |
} |
| 109 | 108 |
} |
| 110 | 109 |
} |