| ... | ... |
@@ -1,5 +1,7 @@ |
| 1 | 1 |
#include <ctype.h> |
| 2 | 2 |
#include <string.h> |
| 3 |
+#include <strings.h> |
|
| 4 |
+#include <stdio.h> |
|
| 3 | 5 |
|
| 4 | 6 |
static int is_subset(const char *needle, const char *haystack){
|
| 5 | 7 |
while(*needle){
|
| ... | ... |
@@ -11,14 +13,73 @@ static int is_subset(const char *needle, const char *haystack){
|
| 11 | 13 |
return 1; |
| 12 | 14 |
} |
| 13 | 15 |
|
| 16 |
+/* print one of the internal matrices */ |
|
| 17 |
+void mat_print(int *mat, int n, int m){
|
|
| 18 |
+ int i, j; |
|
| 19 |
+ for(i = 0; i < n; i++){
|
|
| 20 |
+ for(j = 0; j < m; j++){
|
|
| 21 |
+ fprintf(stderr, " %3zd", mat[i*m + j]); |
|
| 22 |
+ } |
|
| 23 |
+ fprintf(stderr, "\n"); |
|
| 24 |
+ } |
|
| 25 |
+ fprintf(stderr, "\n\n"); |
|
| 26 |
+} |
|
| 27 |
+ |
|
| 28 |
+#define max(a, b) (((a) > (b)) ? (a) : (b)) |
|
| 29 |
+typedef int score_t; |
|
| 30 |
+ |
|
| 31 |
+double calculate_score(const char *needle, const char *haystack){
|
|
| 32 |
+ int n = strlen(needle); |
|
| 33 |
+ int m = strlen(haystack); |
|
| 34 |
+ |
|
| 35 |
+ int bow[m]; |
|
| 36 |
+ int D[n][m], M[n][m]; |
|
| 37 |
+ bzero(D, sizeof(D)); |
|
| 38 |
+ bzero(M, sizeof(M)); |
|
| 39 |
+ |
|
| 40 |
+ /* |
|
| 41 |
+ * D[][] Stores the best score for this position ending with a match. |
|
| 42 |
+ * M[][] Stores the best possible score at this position. |
|
| 43 |
+ */ |
|
| 44 |
+ |
|
| 45 |
+ /* Which positions are beginning of words */ |
|
| 46 |
+ int at_bow = 1; |
|
| 47 |
+ for(int i = 0; i < m; i++){
|
|
| 48 |
+ char ch = haystack[i]; |
|
| 49 |
+ /* TODO: What about allcaps (ex. README) */ |
|
| 50 |
+ bow[i] = (at_bow && isalnum(ch)) || isupper(ch); |
|
| 51 |
+ at_bow = !isalnum(ch); |
|
| 52 |
+ } |
|
| 53 |
+ |
|
| 54 |
+ for(int i = 0; i < n; i++){
|
|
| 55 |
+ for(int j = 0; j < m; j++){
|
|
| 56 |
+ int match = tolower(needle[i]) == tolower(haystack[j]); |
|
| 57 |
+ if(match){
|
|
| 58 |
+ score_t score = 0; |
|
| 59 |
+ if(i && j) |
|
| 60 |
+ score = M[i-1][j-1]; |
|
| 61 |
+ if(bow[j]) |
|
| 62 |
+ score += 2; |
|
| 63 |
+ else if(i && j && D[i-1][j-1]) |
|
| 64 |
+ score = max(score, 1 + D[i-1][j-1]); |
|
| 65 |
+ M[i][j] = D[i][j] = score; |
|
| 66 |
+ } |
|
| 67 |
+ if(j) |
|
| 68 |
+ M[i][j] = max(M[i][j], M[i][j-1]); |
|
| 69 |
+ } |
|
| 70 |
+ } |
|
| 71 |
+ |
|
| 72 |
+ return (float)(M[n-1][m-1]) / (float)(n * 2 + 1); |
|
| 73 |
+} |
|
| 74 |
+ |
|
| 14 | 75 |
double match(const char *needle, const char *haystack){
|
| 15 | 76 |
if(!*needle){
|
| 16 | 77 |
return 1.0; |
| 17 | 78 |
}else if(!is_subset(needle, haystack)){
|
| 18 |
- return 0.0; |
|
| 79 |
+ return -1.0; |
|
| 19 | 80 |
}else if(!strcasecmp(needle, haystack)){
|
| 20 | 81 |
return 1.0; |
| 21 | 82 |
}else{
|
| 22 |
- return 0.9; |
|
| 83 |
+ return calculate_score(needle, haystack); |
|
| 23 | 84 |
} |
| 24 | 85 |
} |
| ... | ... |
@@ -8,11 +8,11 @@ describe "score" do |
| 8 | 8 |
end |
| 9 | 9 |
|
| 10 | 10 |
def assert_unmatched(candidate, query) |
| 11 |
- assert_equal 0, score(candidate, query) |
|
| 11 |
+ assert_equal -1, score(candidate, query) |
|
| 12 | 12 |
end |
| 13 | 13 |
|
| 14 | 14 |
def assert_matched(candidate, query) |
| 15 |
- assert_operator 0, :<, score(candidate, query) |
|
| 15 |
+ assert_operator 0, :<=, score(candidate, query) |
|
| 16 | 16 |
end |
| 17 | 17 |
|
| 18 | 18 |
it "scores 1 when the query is empty" do |