| 1 | 1 |
deleted file mode 100644 |
| ... | ... |
@@ -1,183 +0,0 @@ |
| 1 |
-#include <ctype.h> |
|
| 2 |
-#include <string.h> |
|
| 3 |
-#include <strings.h> |
|
| 4 |
-#include <stdio.h> |
|
| 5 |
-#include <float.h> |
|
| 6 |
-#include <math.h> |
|
| 7 |
- |
|
| 8 |
-#include "match.h" |
|
| 9 |
- |
|
| 10 |
-#include "config.h" |
|
| 11 |
- |
|
| 12 |
-char *strcasechr(const char *s, char c) {
|
|
| 13 |
- const char accept[3] = {c, toupper(c), 0};
|
|
| 14 |
- return strpbrk(s, accept); |
|
| 15 |
-} |
|
| 16 |
- |
|
| 17 |
-int has_match(const char *needle, const char *haystack) {
|
|
| 18 |
- while (*needle) {
|
|
| 19 |
- char nch = *needle++; |
|
| 20 |
- |
|
| 21 |
- if (!(haystack = strcasechr(haystack, nch))) {
|
|
| 22 |
- return 0; |
|
| 23 |
- } |
|
| 24 |
- haystack++; |
|
| 25 |
- } |
|
| 26 |
- return 1; |
|
| 27 |
-} |
|
| 28 |
- |
|
| 29 |
-#define max(a, b) (((a) > (b)) ? (a) : (b)) |
|
| 30 |
- |
|
| 31 |
-#ifdef DEBUG_VERBOSE |
|
| 32 |
-/* print one of the internal matrices */ |
|
| 33 |
-void mat_print(score_t *mat, char name, const char *needle, const char *haystack) {
|
|
| 34 |
- int n = strlen(needle); |
|
| 35 |
- int m = strlen(haystack); |
|
| 36 |
- int i, j; |
|
| 37 |
- fprintf(stderr, "%c ", name); |
|
| 38 |
- for (j = 0; j < m; j++) {
|
|
| 39 |
- fprintf(stderr, " %c", haystack[j]); |
|
| 40 |
- } |
|
| 41 |
- fprintf(stderr, "\n"); |
|
| 42 |
- for (i = 0; i < n; i++) {
|
|
| 43 |
- fprintf(stderr, " %c |", needle[i]); |
|
| 44 |
- for (j = 0; j < m; j++) {
|
|
| 45 |
- score_t val = mat[i * m + j]; |
|
| 46 |
- if (val == SCORE_MIN) {
|
|
| 47 |
- fprintf(stderr, " -\u221E"); |
|
| 48 |
- } else {
|
|
| 49 |
- fprintf(stderr, " %.3f", val); |
|
| 50 |
- } |
|
| 51 |
- } |
|
| 52 |
- fprintf(stderr, "\n"); |
|
| 53 |
- } |
|
| 54 |
- fprintf(stderr, "\n\n"); |
|
| 55 |
-} |
|
| 56 |
-#endif |
|
| 57 |
- |
|
| 58 |
-score_t calculate_score(const char *needle, const char *haystack, size_t *positions) {
|
|
| 59 |
- if (!*haystack || !*needle) |
|
| 60 |
- return SCORE_MIN; |
|
| 61 |
- |
|
| 62 |
- int n = strlen(needle); |
|
| 63 |
- int m = strlen(haystack); |
|
| 64 |
- |
|
| 65 |
- if (m > 1024) {
|
|
| 66 |
- /* |
|
| 67 |
- * Unreasonably large candidate: return no score |
|
| 68 |
- * If it is a valid match it will still be returned, it will |
|
| 69 |
- * just be ranked below any reasonably sized candidates |
|
| 70 |
- */ |
|
| 71 |
- return SCORE_MIN; |
|
| 72 |
- } |
|
| 73 |
- |
|
| 74 |
- score_t match_bonus[m]; |
|
| 75 |
- score_t D[n][m], M[n][m]; |
|
| 76 |
- |
|
| 77 |
- /* |
|
| 78 |
- * D[][] Stores the best score for this position ending with a match. |
|
| 79 |
- * M[][] Stores the best possible score at this position. |
|
| 80 |
- */ |
|
| 81 |
- |
|
| 82 |
- /* Which positions are beginning of words */ |
|
| 83 |
- char last_ch = '\0'; |
|
| 84 |
- for (int i = 0; i < m; i++) {
|
|
| 85 |
- char ch = haystack[i]; |
|
| 86 |
- |
|
| 87 |
- score_t score = 0; |
|
| 88 |
- if (isalnum(ch)) {
|
|
| 89 |
- if (!last_ch || last_ch == '/') {
|
|
| 90 |
- score = SCORE_MATCH_SLASH; |
|
| 91 |
- } else if (last_ch == '-' || last_ch == '_' || last_ch == ' ' || |
|
| 92 |
- (last_ch >= '0' && last_ch <= '9')) {
|
|
| 93 |
- score = SCORE_MATCH_WORD; |
|
| 94 |
- } else if (last_ch >= 'a' && last_ch <= 'z' && ch >= 'A' && ch <= 'Z') {
|
|
| 95 |
- /* CamelCase */ |
|
| 96 |
- score = SCORE_MATCH_CAPITAL; |
|
| 97 |
- } else if (last_ch == '.') {
|
|
| 98 |
- score = SCORE_MATCH_DOT; |
|
| 99 |
- } |
|
| 100 |
- } |
|
| 101 |
- |
|
| 102 |
- match_bonus[i] = score; |
|
| 103 |
- last_ch = ch; |
|
| 104 |
- } |
|
| 105 |
- |
|
| 106 |
- for (int i = 0; i < n; i++) {
|
|
| 107 |
- score_t prev_score = SCORE_MIN; |
|
| 108 |
- score_t gap_score = i == n - 1 ? SCORE_GAP_TRAILING : SCORE_GAP_INNER; |
|
| 109 |
- for (int j = 0; j < m; j++) {
|
|
| 110 |
- score_t score = SCORE_MIN; |
|
| 111 |
- if (tolower(needle[i]) == tolower(haystack[j])) {
|
|
| 112 |
- if (!i) {
|
|
| 113 |
- score = (j * SCORE_GAP_LEADING) + match_bonus[j]; |
|
| 114 |
- } else if (j) { /* i > 0 && j > 0*/
|
|
| 115 |
- score = max( |
|
| 116 |
- M[i - 1][j - 1] + match_bonus[j], |
|
| 117 |
- |
|
| 118 |
- /* consecutive match, doesn't stack with match_bonus */ |
|
| 119 |
- D[i - 1][j - 1] + SCORE_MATCH_CONSECUTIVE); |
|
| 120 |
- } |
|
| 121 |
- } |
|
| 122 |
- D[i][j] = score; |
|
| 123 |
- M[i][j] = prev_score = max(score, prev_score + gap_score); |
|
| 124 |
- } |
|
| 125 |
- } |
|
| 126 |
- |
|
| 127 |
-#ifdef DEBUG_VERBOSE |
|
| 128 |
- fprintf(stderr, "\"%s\" =~ \"%s\"\n", needle, haystack); |
|
| 129 |
- mat_print(&D[0][0], 'D', needle, haystack); |
|
| 130 |
- mat_print(&M[0][0], 'M', needle, haystack); |
|
| 131 |
- fprintf(stderr, "\n"); |
|
| 132 |
-#endif |
|
| 133 |
- |
|
| 134 |
- /* backtrace to find the positions of optimal matching */ |
|
| 135 |
- if (positions) {
|
|
| 136 |
- int match_required = 0; |
|
| 137 |
- for (int i = n - 1, j = m - 1; i >= 0; i--) {
|
|
| 138 |
- for (; j >= 0; j--) {
|
|
| 139 |
- /* |
|
| 140 |
- * There may be multiple paths which result in |
|
| 141 |
- * the optimal weight. |
|
| 142 |
- * |
|
| 143 |
- * For simplicity, we will pick the first one |
|
| 144 |
- * we encounter, the latest in the candidate |
|
| 145 |
- * string. |
|
| 146 |
- */ |
|
| 147 |
- if (D[i][j] != SCORE_MIN && |
|
| 148 |
- (match_required || D[i][j] == M[i][j])) {
|
|
| 149 |
- /* If this score was determined using |
|
| 150 |
- * SCORE_MATCH_CONSECUTIVE, the |
|
| 151 |
- * previous character MUST be a match |
|
| 152 |
- */ |
|
| 153 |
- match_required = |
|
| 154 |
- i && j && |
|
| 155 |
- M[i][j] == D[i - 1][j - 1] + SCORE_MATCH_CONSECUTIVE; |
|
| 156 |
- positions[i] = j--; |
|
| 157 |
- break; |
|
| 158 |
- } |
|
| 159 |
- } |
|
| 160 |
- } |
|
| 161 |
- } |
|
| 162 |
- |
|
| 163 |
- return M[n - 1][m - 1]; |
|
| 164 |
-} |
|
| 165 |
- |
|
| 166 |
-score_t match_positions(const char *needle, const char *haystack, size_t *positions) {
|
|
| 167 |
- if (!*needle) {
|
|
| 168 |
- return SCORE_MAX; |
|
| 169 |
- } else if (!strcasecmp(needle, haystack)) {
|
|
| 170 |
- if (positions) {
|
|
| 171 |
- int n = strlen(needle); |
|
| 172 |
- for (int i = 0; i < n; i++) |
|
| 173 |
- positions[i] = i; |
|
| 174 |
- } |
|
| 175 |
- return SCORE_MAX; |
|
| 176 |
- } else {
|
|
| 177 |
- return calculate_score(needle, haystack, positions); |
|
| 178 |
- } |
|
| 179 |
-} |
|
| 180 |
- |
|
| 181 |
-score_t match(const char *needle, const char *haystack) {
|
|
| 182 |
- return match_positions(needle, haystack, NULL); |
|
| 183 |
-} |
| ... | ... |
@@ -30,13 +30,13 @@ int has_match(const char *needle, const char *haystack) {
|
| 30 | 30 |
|
| 31 | 31 |
#ifdef DEBUG_VERBOSE |
| 32 | 32 |
/* print one of the internal matrices */ |
| 33 |
-void mat_print(score_t *mat, const char *needle, const char *haystack) {
|
|
| 33 |
+void mat_print(score_t *mat, char name, const char *needle, const char *haystack) {
|
|
| 34 | 34 |
int n = strlen(needle); |
| 35 | 35 |
int m = strlen(haystack); |
| 36 | 36 |
int i, j; |
| 37 |
- fprintf(stderr, " "); |
|
| 37 |
+ fprintf(stderr, "%c ", name); |
|
| 38 | 38 |
for (j = 0; j < m; j++) {
|
| 39 |
- fprintf(stderr, " %c", haystack[j]); |
|
| 39 |
+ fprintf(stderr, " %c", haystack[j]); |
|
| 40 | 40 |
} |
| 41 | 41 |
fprintf(stderr, "\n"); |
| 42 | 42 |
for (i = 0; i < n; i++) {
|
| ... | ... |
@@ -44,9 +44,9 @@ void mat_print(score_t *mat, const char *needle, const char *haystack) {
|
| 44 | 44 |
for (j = 0; j < m; j++) {
|
| 45 | 45 |
score_t val = mat[i * m + j]; |
| 46 | 46 |
if (val == SCORE_MIN) {
|
| 47 |
- fprintf(stderr, " -\u221E"); |
|
| 47 |
+ fprintf(stderr, " -\u221E"); |
|
| 48 | 48 |
} else {
|
| 49 |
- fprintf(stderr, " % 4g", val); |
|
| 49 |
+ fprintf(stderr, " %.3f", val); |
|
| 50 | 50 |
} |
| 51 | 51 |
} |
| 52 | 52 |
fprintf(stderr, "\n"); |
| ... | ... |
@@ -111,7 +111,7 @@ score_t calculate_score(const char *needle, const char *haystack, size_t *positi |
| 111 | 111 |
if (tolower(needle[i]) == tolower(haystack[j])) {
|
| 112 | 112 |
if (!i) {
|
| 113 | 113 |
score = (j * SCORE_GAP_LEADING) + match_bonus[j]; |
| 114 |
- } else if (j) {
|
|
| 114 |
+ } else if (j) { /* i > 0 && j > 0*/
|
|
| 115 | 115 |
score = max( |
| 116 | 116 |
M[i - 1][j - 1] + match_bonus[j], |
| 117 | 117 |
|
| ... | ... |
@@ -126,8 +126,8 @@ score_t calculate_score(const char *needle, const char *haystack, size_t *positi |
| 126 | 126 |
|
| 127 | 127 |
#ifdef DEBUG_VERBOSE |
| 128 | 128 |
fprintf(stderr, "\"%s\" =~ \"%s\"\n", needle, haystack); |
| 129 |
- mat_print(&D[0][0], needle, haystack); |
|
| 130 |
- mat_print(&M[0][0], needle, haystack); |
|
| 129 |
+ mat_print(&D[0][0], 'D', needle, haystack); |
|
| 130 |
+ mat_print(&M[0][0], 'M', needle, haystack); |
|
| 131 | 131 |
fprintf(stderr, "\n"); |
| 132 | 132 |
#endif |
| 133 | 133 |
|
| ... | ... |
@@ -36,7 +36,7 @@ void mat_print(score_t *mat, const char *needle, const char *haystack) {
|
| 36 | 36 |
int i, j; |
| 37 | 37 |
fprintf(stderr, " "); |
| 38 | 38 |
for (j = 0; j < m; j++) {
|
| 39 |
- fprintf(stderr, " %c", haystack[j]); |
|
| 39 |
+ fprintf(stderr, " %c", haystack[j]); |
|
| 40 | 40 |
} |
| 41 | 41 |
fprintf(stderr, "\n"); |
| 42 | 42 |
for (i = 0; i < n; i++) {
|
| ... | ... |
@@ -44,9 +44,9 @@ void mat_print(score_t *mat, const char *needle, const char *haystack) {
|
| 44 | 44 |
for (j = 0; j < m; j++) {
|
| 45 | 45 |
score_t val = mat[i * m + j]; |
| 46 | 46 |
if (val == SCORE_MIN) {
|
| 47 |
- fprintf(stderr, " -inf"); |
|
| 47 |
+ fprintf(stderr, " -\u221E"); |
|
| 48 | 48 |
} else {
|
| 49 |
- fprintf(stderr, " % .2f", val); |
|
| 49 |
+ fprintf(stderr, " % 4g", val); |
|
| 50 | 50 |
} |
| 51 | 51 |
} |
| 52 | 52 |
fprintf(stderr, "\n"); |
| ... | ... |
@@ -28,6 +28,7 @@ int has_match(const char *needle, const char *haystack) {
|
| 28 | 28 |
|
| 29 | 29 |
#define max(a, b) (((a) > (b)) ? (a) : (b)) |
| 30 | 30 |
|
| 31 |
+#ifdef DEBUG_VERBOSE |
|
| 31 | 32 |
/* print one of the internal matrices */ |
| 32 | 33 |
void mat_print(score_t *mat, const char *needle, const char *haystack) {
|
| 33 | 34 |
int n = strlen(needle); |
| ... | ... |
@@ -52,6 +53,7 @@ void mat_print(score_t *mat, const char *needle, const char *haystack) {
|
| 52 | 53 |
} |
| 53 | 54 |
fprintf(stderr, "\n\n"); |
| 54 | 55 |
} |
| 56 |
+#endif |
|
| 55 | 57 |
|
| 56 | 58 |
score_t calculate_score(const char *needle, const char *haystack, size_t *positions) {
|
| 57 | 59 |
if (!*haystack || !*needle) |
| ... | ... |
@@ -122,7 +124,7 @@ score_t calculate_score(const char *needle, const char *haystack, size_t *positi |
| 122 | 124 |
} |
| 123 | 125 |
} |
| 124 | 126 |
|
| 125 |
-#if 0 |
|
| 127 |
+#ifdef DEBUG_VERBOSE |
|
| 126 | 128 |
fprintf(stderr, "\"%s\" =~ \"%s\"\n", needle, haystack); |
| 127 | 129 |
mat_print(&D[0][0], needle, haystack); |
| 128 | 130 |
mat_print(&M[0][0], needle, haystack); |
Apologies that this uses my preferred formatting style: mostly the same
as Linux, but without a break between function and brace. Adds spaces in
a few places they weren't before.
| ... | ... |
@@ -9,16 +9,16 @@ |
| 9 | 9 |
|
| 10 | 10 |
#include "config.h" |
| 11 | 11 |
|
| 12 |
-char *strcasechr(const char *s, char c){
|
|
| 12 |
+char *strcasechr(const char *s, char c) {
|
|
| 13 | 13 |
const char accept[3] = {c, toupper(c), 0};
|
| 14 | 14 |
return strpbrk(s, accept); |
| 15 | 15 |
} |
| 16 | 16 |
|
| 17 |
-int has_match(const char *needle, const char *haystack){
|
|
| 18 |
- while(*needle){
|
|
| 17 |
+int has_match(const char *needle, const char *haystack) {
|
|
| 18 |
+ while (*needle) {
|
|
| 19 | 19 |
char nch = *needle++; |
| 20 | 20 |
|
| 21 |
- if(!(haystack = strcasechr(haystack, nch))){
|
|
| 21 |
+ if (!(haystack = strcasechr(haystack, nch))) {
|
|
| 22 | 22 |
return 0; |
| 23 | 23 |
} |
| 24 | 24 |
haystack++; |
| ... | ... |
@@ -29,22 +29,22 @@ int has_match(const char *needle, const char *haystack){
|
| 29 | 29 |
#define max(a, b) (((a) > (b)) ? (a) : (b)) |
| 30 | 30 |
|
| 31 | 31 |
/* print one of the internal matrices */ |
| 32 |
-void mat_print(score_t *mat, const char *needle, const char *haystack){
|
|
| 32 |
+void mat_print(score_t *mat, const char *needle, const char *haystack) {
|
|
| 33 | 33 |
int n = strlen(needle); |
| 34 | 34 |
int m = strlen(haystack); |
| 35 | 35 |
int i, j; |
| 36 | 36 |
fprintf(stderr, " "); |
| 37 |
- for(j = 0; j < m; j++){
|
|
| 37 |
+ for (j = 0; j < m; j++) {
|
|
| 38 | 38 |
fprintf(stderr, " %c", haystack[j]); |
| 39 | 39 |
} |
| 40 | 40 |
fprintf(stderr, "\n"); |
| 41 |
- for(i = 0; i < n; i++){
|
|
| 41 |
+ for (i = 0; i < n; i++) {
|
|
| 42 | 42 |
fprintf(stderr, " %c |", needle[i]); |
| 43 |
- for(j = 0; j < m; j++){
|
|
| 44 |
- score_t val = mat[i*m + j]; |
|
| 45 |
- if(val == SCORE_MIN){
|
|
| 43 |
+ for (j = 0; j < m; j++) {
|
|
| 44 |
+ score_t val = mat[i * m + j]; |
|
| 45 |
+ if (val == SCORE_MIN) {
|
|
| 46 | 46 |
fprintf(stderr, " -inf"); |
| 47 |
- }else{
|
|
| 47 |
+ } else {
|
|
| 48 | 48 |
fprintf(stderr, " % .2f", val); |
| 49 | 49 |
} |
| 50 | 50 |
} |
| ... | ... |
@@ -53,14 +53,14 @@ void mat_print(score_t *mat, const char *needle, const char *haystack){
|
| 53 | 53 |
fprintf(stderr, "\n\n"); |
| 54 | 54 |
} |
| 55 | 55 |
|
| 56 |
-score_t calculate_score(const char *needle, const char *haystack, size_t *positions){
|
|
| 57 |
- if(!*haystack || !*needle) |
|
| 56 |
+score_t calculate_score(const char *needle, const char *haystack, size_t *positions) {
|
|
| 57 |
+ if (!*haystack || !*needle) |
|
| 58 | 58 |
return SCORE_MIN; |
| 59 | 59 |
|
| 60 | 60 |
int n = strlen(needle); |
| 61 | 61 |
int m = strlen(haystack); |
| 62 | 62 |
|
| 63 |
- if(m > 1024){
|
|
| 63 |
+ if (m > 1024) {
|
|
| 64 | 64 |
/* |
| 65 | 65 |
* Unreasonably large candidate: return no score |
| 66 | 66 |
* If it is a valid match it will still be returned, it will |
| ... | ... |
@@ -79,23 +79,20 @@ score_t calculate_score(const char *needle, const char *haystack, size_t *positi |
| 79 | 79 |
|
| 80 | 80 |
/* Which positions are beginning of words */ |
| 81 | 81 |
char last_ch = '\0'; |
| 82 |
- for(int i = 0; i < m; i++){
|
|
| 82 |
+ for (int i = 0; i < m; i++) {
|
|
| 83 | 83 |
char ch = haystack[i]; |
| 84 | 84 |
|
| 85 | 85 |
score_t score = 0; |
| 86 |
- if(isalnum(ch)){
|
|
| 87 |
- if(!last_ch || last_ch == '/'){
|
|
| 86 |
+ if (isalnum(ch)) {
|
|
| 87 |
+ if (!last_ch || last_ch == '/') {
|
|
| 88 | 88 |
score = SCORE_MATCH_SLASH; |
| 89 |
- }else if(last_ch == '-' || |
|
| 90 |
- last_ch == '_' || |
|
| 91 |
- last_ch == ' ' || |
|
| 92 |
- (last_ch >= '0' && last_ch <= '9')){
|
|
| 89 |
+ } else if (last_ch == '-' || last_ch == '_' || last_ch == ' ' || |
|
| 90 |
+ (last_ch >= '0' && last_ch <= '9')) {
|
|
| 93 | 91 |
score = SCORE_MATCH_WORD; |
| 94 |
- }else if(last_ch >= 'a' && last_ch <= 'z' && |
|
| 95 |
- ch >= 'A' && ch <= 'Z'){
|
|
| 92 |
+ } else if (last_ch >= 'a' && last_ch <= 'z' && ch >= 'A' && ch <= 'Z') {
|
|
| 96 | 93 |
/* CamelCase */ |
| 97 | 94 |
score = SCORE_MATCH_CAPITAL; |
| 98 |
- }else if(last_ch == '.'){
|
|
| 95 |
+ } else if (last_ch == '.') {
|
|
| 99 | 96 |
score = SCORE_MATCH_DOT; |
| 100 | 97 |
} |
| 101 | 98 |
} |
| ... | ... |
@@ -104,21 +101,20 @@ score_t calculate_score(const char *needle, const char *haystack, size_t *positi |
| 104 | 101 |
last_ch = ch; |
| 105 | 102 |
} |
| 106 | 103 |
|
| 107 |
- for(int i = 0; i < n; i++){
|
|
| 104 |
+ for (int i = 0; i < n; i++) {
|
|
| 108 | 105 |
score_t prev_score = SCORE_MIN; |
| 109 |
- score_t gap_score = i == n-1 ? SCORE_GAP_TRAILING : SCORE_GAP_INNER; |
|
| 110 |
- for(int j = 0; j < m; j++){
|
|
| 106 |
+ score_t gap_score = i == n - 1 ? SCORE_GAP_TRAILING : SCORE_GAP_INNER; |
|
| 107 |
+ for (int j = 0; j < m; j++) {
|
|
| 111 | 108 |
score_t score = SCORE_MIN; |
| 112 |
- if(tolower(needle[i]) == tolower(haystack[j])){
|
|
| 113 |
- if(!i){
|
|
| 109 |
+ if (tolower(needle[i]) == tolower(haystack[j])) {
|
|
| 110 |
+ if (!i) {
|
|
| 114 | 111 |
score = (j * SCORE_GAP_LEADING) + match_bonus[j]; |
| 115 |
- }else if(j){
|
|
| 112 |
+ } else if (j) {
|
|
| 116 | 113 |
score = max( |
| 117 |
- M[i-1][j-1] + match_bonus[j], |
|
| 114 |
+ M[i - 1][j - 1] + match_bonus[j], |
|
| 118 | 115 |
|
| 119 |
- /* consecutive match, doesn't stack with match_bonus */ |
|
| 120 |
- D[i-1][j-1] + SCORE_MATCH_CONSECUTIVE |
|
| 121 |
- ); |
|
| 116 |
+ /* consecutive match, doesn't stack with match_bonus */ |
|
| 117 |
+ D[i - 1][j - 1] + SCORE_MATCH_CONSECUTIVE); |
|
| 122 | 118 |
} |
| 123 | 119 |
} |
| 124 | 120 |
D[i][j] = score; |
| ... | ... |
@@ -134,10 +130,10 @@ score_t calculate_score(const char *needle, const char *haystack, size_t *positi |
| 134 | 130 |
#endif |
| 135 | 131 |
|
| 136 | 132 |
/* backtrace to find the positions of optimal matching */ |
| 137 |
- if(positions){
|
|
| 133 |
+ if (positions) {
|
|
| 138 | 134 |
int match_required = 0; |
| 139 |
- for(int i = n-1, j = m-1; i >= 0; i--){
|
|
| 140 |
- for(; j >= 0; j--){
|
|
| 135 |
+ for (int i = n - 1, j = m - 1; i >= 0; i--) {
|
|
| 136 |
+ for (; j >= 0; j--) {
|
|
| 141 | 137 |
/* |
| 142 | 138 |
* There may be multiple paths which result in |
| 143 | 139 |
* the optimal weight. |
| ... | ... |
@@ -146,12 +142,15 @@ score_t calculate_score(const char *needle, const char *haystack, size_t *positi |
| 146 | 142 |
* we encounter, the latest in the candidate |
| 147 | 143 |
* string. |
| 148 | 144 |
*/ |
| 149 |
- if(D[i][j] != SCORE_MIN && (match_required || D[i][j] == M[i][j])){
|
|
| 145 |
+ if (D[i][j] != SCORE_MIN && |
|
| 146 |
+ (match_required || D[i][j] == M[i][j])) {
|
|
| 150 | 147 |
/* If this score was determined using |
| 151 | 148 |
* SCORE_MATCH_CONSECUTIVE, the |
| 152 | 149 |
* previous character MUST be a match |
| 153 | 150 |
*/ |
| 154 |
- match_required = i && j && M[i][j] == D[i-1][j-1] + SCORE_MATCH_CONSECUTIVE; |
|
| 151 |
+ match_required = |
|
| 152 |
+ i && j && |
|
| 153 |
+ M[i][j] == D[i - 1][j - 1] + SCORE_MATCH_CONSECUTIVE; |
|
| 155 | 154 |
positions[i] = j--; |
| 156 | 155 |
break; |
| 157 | 156 |
} |
| ... | ... |
@@ -159,24 +158,24 @@ score_t calculate_score(const char *needle, const char *haystack, size_t *positi |
| 159 | 158 |
} |
| 160 | 159 |
} |
| 161 | 160 |
|
| 162 |
- return M[n-1][m-1]; |
|
| 161 |
+ return M[n - 1][m - 1]; |
|
| 163 | 162 |
} |
| 164 | 163 |
|
| 165 |
-score_t match_positions(const char *needle, const char *haystack, size_t *positions){
|
|
| 166 |
- if(!*needle){
|
|
| 164 |
+score_t match_positions(const char *needle, const char *haystack, size_t *positions) {
|
|
| 165 |
+ if (!*needle) {
|
|
| 167 | 166 |
return SCORE_MAX; |
| 168 |
- }else if(!strcasecmp(needle, haystack)){
|
|
| 169 |
- if(positions){
|
|
| 167 |
+ } else if (!strcasecmp(needle, haystack)) {
|
|
| 168 |
+ if (positions) {
|
|
| 170 | 169 |
int n = strlen(needle); |
| 171 |
- for(int i = 0; i < n; i++) |
|
| 170 |
+ for (int i = 0; i < n; i++) |
|
| 172 | 171 |
positions[i] = i; |
| 173 | 172 |
} |
| 174 | 173 |
return SCORE_MAX; |
| 175 |
- }else{
|
|
| 174 |
+ } else {
|
|
| 176 | 175 |
return calculate_score(needle, haystack, positions); |
| 177 | 176 |
} |
| 178 | 177 |
} |
| 179 | 178 |
|
| 180 |
-score_t match(const char *needle, const char *haystack){
|
|
| 179 |
+score_t match(const char *needle, const char *haystack) {
|
|
| 181 | 180 |
return match_positions(needle, haystack, NULL); |
| 182 | 181 |
} |
| ... | ... |
@@ -9,16 +9,19 @@ |
| 9 | 9 |
|
| 10 | 10 |
#include "config.h" |
| 11 | 11 |
|
| 12 |
+char *strcasechr(const char *s, char c){
|
|
| 13 |
+ const char accept[3] = {c, toupper(c), 0};
|
|
| 14 |
+ return strpbrk(s, accept); |
|
| 15 |
+} |
|
| 16 |
+ |
|
| 12 | 17 |
int has_match(const char *needle, const char *haystack){
|
| 13 | 18 |
while(*needle){
|
| 14 |
- char nch = tolower(*needle++); |
|
| 15 |
- for(;;){
|
|
| 16 |
- char ch = *haystack++; |
|
| 17 |
- if(!ch) |
|
| 18 |
- return 0; |
|
| 19 |
- else if(nch == tolower(ch)) |
|
| 20 |
- break; |
|
| 19 |
+ char nch = *needle++; |
|
| 20 |
+ |
|
| 21 |
+ if(!(haystack = strcasechr(haystack, nch))){
|
|
| 22 |
+ return 0; |
|
| 21 | 23 |
} |
| 24 |
+ haystack++; |
|
| 22 | 25 |
} |
| 23 | 26 |
return 1; |
| 24 | 27 |
} |
| ... | ... |
@@ -50,7 +50,7 @@ void mat_print(score_t *mat, const char *needle, const char *haystack){
|
| 50 | 50 |
fprintf(stderr, "\n\n"); |
| 51 | 51 |
} |
| 52 | 52 |
|
| 53 |
-double calculate_score(const char *needle, const char *haystack, size_t *positions){
|
|
| 53 |
+score_t calculate_score(const char *needle, const char *haystack, size_t *positions){
|
|
| 54 | 54 |
if(!*haystack || !*needle) |
| 55 | 55 |
return SCORE_MIN; |
| 56 | 56 |
|
| ... | ... |
@@ -102,8 +102,8 @@ double calculate_score(const char *needle, const char *haystack, size_t *positio |
| 102 | 102 |
} |
| 103 | 103 |
|
| 104 | 104 |
for(int i = 0; i < n; i++){
|
| 105 |
- double prev_score = SCORE_MIN; |
|
| 106 |
- double gap_score = i == n-1 ? SCORE_GAP_TRAILING : SCORE_GAP_INNER; |
|
| 105 |
+ score_t prev_score = SCORE_MIN; |
|
| 106 |
+ score_t gap_score = i == n-1 ? SCORE_GAP_TRAILING : SCORE_GAP_INNER; |
|
| 107 | 107 |
for(int j = 0; j < m; j++){
|
| 108 | 108 |
score_t score = SCORE_MIN; |
| 109 | 109 |
if(tolower(needle[i]) == tolower(haystack[j])){
|
| ... | ... |
@@ -159,7 +159,7 @@ double calculate_score(const char *needle, const char *haystack, size_t *positio |
| 159 | 159 |
return M[n-1][m-1]; |
| 160 | 160 |
} |
| 161 | 161 |
|
| 162 |
-double match_positions(const char *needle, const char *haystack, size_t *positions){
|
|
| 162 |
+score_t match_positions(const char *needle, const char *haystack, size_t *positions){
|
|
| 163 | 163 |
if(!*needle){
|
| 164 | 164 |
return SCORE_MAX; |
| 165 | 165 |
}else if(!strcasecmp(needle, haystack)){
|
| ... | ... |
@@ -174,6 +174,6 @@ double match_positions(const char *needle, const char *haystack, size_t *positio |
| 174 | 174 |
} |
| 175 | 175 |
} |
| 176 | 176 |
|
| 177 |
-double match(const char *needle, const char *haystack){
|
|
| 177 |
+score_t match(const char *needle, const char *haystack){
|
|
| 178 | 178 |
return match_positions(needle, haystack, NULL); |
| 179 | 179 |
} |
| ... | ... |
@@ -24,9 +24,6 @@ int has_match(const char *needle, const char *haystack){
|
| 24 | 24 |
} |
| 25 | 25 |
|
| 26 | 26 |
#define max(a, b) (((a) > (b)) ? (a) : (b)) |
| 27 |
-typedef double score_t; |
|
| 28 |
-#define SCORE_MAX INFINITY |
|
| 29 |
-#define SCORE_MIN -INFINITY |
|
| 30 | 27 |
|
| 31 | 28 |
/* print one of the internal matrices */ |
| 32 | 29 |
void mat_print(score_t *mat, const char *needle, const char *haystack){
|
| ... | ... |
@@ -7,6 +7,8 @@ |
| 7 | 7 |
|
| 8 | 8 |
#include "match.h" |
| 9 | 9 |
|
| 10 |
+#include "config.h" |
|
| 11 |
+ |
|
| 10 | 12 |
int has_match(const char *needle, const char *haystack){
|
| 11 | 13 |
while(*needle){
|
| 12 | 14 |
char nch = tolower(*needle++); |
| ... | ... |
@@ -75,15 +77,6 @@ double calculate_score(const char *needle, const char *haystack, size_t *positio |
| 75 | 77 |
* M[][] Stores the best possible score at this position. |
| 76 | 78 |
*/ |
| 77 | 79 |
|
| 78 |
-#define SCORE_GAP_LEADING -0.005 |
|
| 79 |
-#define SCORE_GAP_TRAILING -0.005 |
|
| 80 |
-#define SCORE_GAP_INNER -0.01 |
|
| 81 |
-#define SCORE_MATCH_CONSECUTIVE 1.0 |
|
| 82 |
-#define SCORE_MATCH_SLASH 0.9 |
|
| 83 |
-#define SCORE_MATCH_WORD 0.8 |
|
| 84 |
-#define SCORE_MATCH_CAPITAL 0.7 |
|
| 85 |
-#define SCORE_MATCH_DOT 0.6 |
|
| 86 |
- |
|
| 87 | 80 |
/* Which positions are beginning of words */ |
| 88 | 81 |
char last_ch = '\0'; |
| 89 | 82 |
for(int i = 0; i < m; i++){
|
| ... | ... |
@@ -172,8 +172,6 @@ double calculate_score(const char *needle, const char *haystack, size_t *positio |
| 172 | 172 |
double match_positions(const char *needle, const char *haystack, size_t *positions){
|
| 173 | 173 |
if(!*needle){
|
| 174 | 174 |
return SCORE_MAX; |
| 175 |
- }else if(!has_match(needle, haystack)){
|
|
| 176 |
- return SCORE_MIN; |
|
| 177 | 175 |
}else if(!strcasecmp(needle, haystack)){
|
| 178 | 176 |
if(positions){
|
| 179 | 177 |
int n = strlen(needle); |
| ... | ... |
@@ -9,10 +9,14 @@ |
| 9 | 9 |
|
| 10 | 10 |
int has_match(const char *needle, const char *haystack){
|
| 11 | 11 |
while(*needle){
|
| 12 |
- if(!*haystack) |
|
| 13 |
- return 0; |
|
| 14 |
- while(*haystack && tolower(*needle) == tolower(*haystack++)) |
|
| 15 |
- needle++; |
|
| 12 |
+ char nch = tolower(*needle++); |
|
| 13 |
+ for(;;){
|
|
| 14 |
+ char ch = *haystack++; |
|
| 15 |
+ if(!ch) |
|
| 16 |
+ return 0; |
|
| 17 |
+ else if(nch == tolower(ch)) |
|
| 18 |
+ break; |
|
| 19 |
+ } |
|
| 16 | 20 |
} |
| 17 | 21 |
return 1; |
| 18 | 22 |
} |
| ... | ... |
@@ -65,8 +65,6 @@ double calculate_score(const char *needle, const char *haystack, size_t *positio |
| 65 | 65 |
|
| 66 | 66 |
score_t match_bonus[m]; |
| 67 | 67 |
score_t D[n][m], M[n][m]; |
| 68 |
- bzero(D, sizeof(D)); |
|
| 69 |
- bzero(M, sizeof(M)); |
|
| 70 | 68 |
|
| 71 | 69 |
/* |
| 72 | 70 |
* D[][] Stores the best score for this position ending with a match. |
| ... | ... |
@@ -118,16 +118,16 @@ double calculate_score(const char *needle, const char *haystack, size_t *positio |
| 118 | 118 |
if(!i){
|
| 119 | 119 |
score = (j * SCORE_GAP_LEADING) + match_bonus[j]; |
| 120 | 120 |
}else if(j){
|
| 121 |
- score = max(score, M[i-1][j-1] + match_bonus[j]); |
|
| 121 |
+ score = max( |
|
| 122 |
+ M[i-1][j-1] + match_bonus[j], |
|
| 122 | 123 |
|
| 123 |
- /* consecutive match, doesn't stack with match_bonus */ |
|
| 124 |
- score = max(score, D[i-1][j-1] + SCORE_MATCH_CONSECUTIVE); |
|
| 124 |
+ /* consecutive match, doesn't stack with match_bonus */ |
|
| 125 |
+ D[i-1][j-1] + SCORE_MATCH_CONSECUTIVE |
|
| 126 |
+ ); |
|
| 125 | 127 |
} |
| 126 | 128 |
} |
| 127 | 129 |
D[i][j] = score; |
| 128 |
- score = max(score, prev_score + gap_score); |
|
| 129 |
- M[i][j] = score; |
|
| 130 |
- prev_score = score; |
|
| 130 |
+ M[i][j] = prev_score = max(score, prev_score + gap_score); |
|
| 131 | 131 |
} |
| 132 | 132 |
} |
| 133 | 133 |
|
| ... | ... |
@@ -111,6 +111,7 @@ double calculate_score(const char *needle, const char *haystack, size_t *positio |
| 111 | 111 |
|
| 112 | 112 |
for(int i = 0; i < n; i++){
|
| 113 | 113 |
double prev_score = SCORE_MIN; |
| 114 |
+ double gap_score = i == n-1 ? SCORE_GAP_TRAILING : SCORE_GAP_INNER; |
|
| 114 | 115 |
for(int j = 0; j < m; j++){
|
| 115 | 116 |
score_t score = SCORE_MIN; |
| 116 | 117 |
if(tolower(needle[i]) == tolower(haystack[j])){
|
| ... | ... |
@@ -124,13 +125,7 @@ double calculate_score(const char *needle, const char *haystack, size_t *positio |
| 124 | 125 |
} |
| 125 | 126 |
} |
| 126 | 127 |
D[i][j] = score; |
| 127 |
- if(j){
|
|
| 128 |
- if(i == n-1){
|
|
| 129 |
- score = max(score, prev_score + SCORE_GAP_TRAILING); |
|
| 130 |
- }else{
|
|
| 131 |
- score = max(score, prev_score + SCORE_GAP_INNER); |
|
| 132 |
- } |
|
| 133 |
- } |
|
| 128 |
+ score = max(score, prev_score + gap_score); |
|
| 134 | 129 |
M[i][j] = score; |
| 135 | 130 |
prev_score = score; |
| 136 | 131 |
} |
| ... | ... |
@@ -110,6 +110,7 @@ double calculate_score(const char *needle, const char *haystack, size_t *positio |
| 110 | 110 |
} |
| 111 | 111 |
|
| 112 | 112 |
for(int i = 0; i < n; i++){
|
| 113 |
+ double prev_score = SCORE_MIN; |
|
| 113 | 114 |
for(int j = 0; j < m; j++){
|
| 114 | 115 |
score_t score = SCORE_MIN; |
| 115 | 116 |
if(tolower(needle[i]) == tolower(haystack[j])){
|
| ... | ... |
@@ -125,12 +126,13 @@ double calculate_score(const char *needle, const char *haystack, size_t *positio |
| 125 | 126 |
D[i][j] = score; |
| 126 | 127 |
if(j){
|
| 127 | 128 |
if(i == n-1){
|
| 128 |
- score = max(score, M[i][j-1] + SCORE_GAP_TRAILING); |
|
| 129 |
+ score = max(score, prev_score + SCORE_GAP_TRAILING); |
|
| 129 | 130 |
}else{
|
| 130 |
- score = max(score, M[i][j-1] + SCORE_GAP_INNER); |
|
| 131 |
+ score = max(score, prev_score + SCORE_GAP_INNER); |
|
| 131 | 132 |
} |
| 132 | 133 |
} |
| 133 | 134 |
M[i][j] = score; |
| 135 |
+ prev_score = score; |
|
| 134 | 136 |
} |
| 135 | 137 |
} |
| 136 | 138 |
|
| ... | ... |
@@ -112,19 +112,17 @@ double calculate_score(const char *needle, const char *haystack, size_t *positio |
| 112 | 112 |
for(int i = 0; i < n; i++){
|
| 113 | 113 |
for(int j = 0; j < m; j++){
|
| 114 | 114 |
score_t score = SCORE_MIN; |
| 115 |
- int match = tolower(needle[i]) == tolower(haystack[j]); |
|
| 116 |
- D[i][j] = SCORE_MIN; |
|
| 117 |
- if(match){
|
|
| 118 |
- if(i && j){
|
|
| 115 |
+ if(tolower(needle[i]) == tolower(haystack[j])){
|
|
| 116 |
+ if(!i){
|
|
| 117 |
+ score = (j * SCORE_GAP_LEADING) + match_bonus[j]; |
|
| 118 |
+ }else if(j){
|
|
| 119 | 119 |
score = max(score, M[i-1][j-1] + match_bonus[j]); |
| 120 | 120 |
|
| 121 | 121 |
/* consecutive match, doesn't stack with match_bonus */ |
| 122 | 122 |
score = max(score, D[i-1][j-1] + SCORE_MATCH_CONSECUTIVE); |
| 123 |
- }else if(!i){
|
|
| 124 |
- score = (j * SCORE_GAP_LEADING) + match_bonus[j]; |
|
| 125 | 123 |
} |
| 126 |
- D[i][j] = score; |
|
| 127 | 124 |
} |
| 125 |
+ D[i][j] = score; |
|
| 128 | 126 |
if(j){
|
| 129 | 127 |
if(i == n-1){
|
| 130 | 128 |
score = max(score, M[i][j-1] + SCORE_GAP_TRAILING); |
| ... | ... |
@@ -77,10 +77,10 @@ double calculate_score(const char *needle, const char *haystack, size_t *positio |
| 77 | 77 |
#define SCORE_GAP_TRAILING -0.005 |
| 78 | 78 |
#define SCORE_GAP_INNER -0.01 |
| 79 | 79 |
#define SCORE_MATCH_CONSECUTIVE 1.0 |
| 80 |
-#define SCORE_MATCH_SLASH 1.5 |
|
| 81 |
-#define SCORE_MATCH_WORD 1.2 |
|
| 82 |
-#define SCORE_MATCH_CAPITAL 1.1 |
|
| 83 |
-#define SCORE_MATCH_DOT 0.8 |
|
| 80 |
+#define SCORE_MATCH_SLASH 0.9 |
|
| 81 |
+#define SCORE_MATCH_WORD 0.8 |
|
| 82 |
+#define SCORE_MATCH_CAPITAL 0.7 |
|
| 83 |
+#define SCORE_MATCH_DOT 0.6 |
|
| 84 | 84 |
|
| 85 | 85 |
/* Which positions are beginning of words */ |
| 86 | 86 |
char last_ch = '\0'; |
| ... | ... |
@@ -145,6 +145,7 @@ double calculate_score(const char *needle, const char *haystack, size_t *positio |
| 145 | 145 |
|
| 146 | 146 |
/* backtrace to find the positions of optimal matching */ |
| 147 | 147 |
if(positions){
|
| 148 |
+ int match_required = 0; |
|
| 148 | 149 |
for(int i = n-1, j = m-1; i >= 0; i--){
|
| 149 | 150 |
for(; j >= 0; j--){
|
| 150 | 151 |
/* |
| ... | ... |
@@ -155,7 +156,12 @@ double calculate_score(const char *needle, const char *haystack, size_t *positio |
| 155 | 156 |
* we encounter, the latest in the candidate |
| 156 | 157 |
* string. |
| 157 | 158 |
*/ |
| 158 |
- if(D[i][j] != SCORE_MIN && D[i][j] == M[i][j]){
|
|
| 159 |
+ if(D[i][j] != SCORE_MIN && (match_required || D[i][j] == M[i][j])){
|
|
| 160 |
+ /* If this score was determined using |
|
| 161 |
+ * SCORE_MATCH_CONSECUTIVE, the |
|
| 162 |
+ * previous character MUST be a match |
|
| 163 |
+ */ |
|
| 164 |
+ match_required = i && j && M[i][j] == D[i-1][j-1] + SCORE_MATCH_CONSECUTIVE; |
|
| 159 | 165 |
positions[i] = j--; |
| 160 | 166 |
break; |
| 161 | 167 |
} |
| ... | ... |
@@ -155,7 +155,7 @@ double calculate_score(const char *needle, const char *haystack, size_t *positio |
| 155 | 155 |
* we encounter, the latest in the candidate |
| 156 | 156 |
* string. |
| 157 | 157 |
*/ |
| 158 |
- if(tolower(needle[i]) == tolower(haystack[j]) && D[i][j] == M[i][j]){
|
|
| 158 |
+ if(D[i][j] != SCORE_MIN && D[i][j] == M[i][j]){
|
|
| 159 | 159 |
positions[i] = j--; |
| 160 | 160 |
break; |
| 161 | 161 |
} |
| ... | ... |
@@ -111,7 +111,7 @@ double calculate_score(const char *needle, const char *haystack, size_t *positio |
| 111 | 111 |
|
| 112 | 112 |
for(int i = 0; i < n; i++){
|
| 113 | 113 |
for(int j = 0; j < m; j++){
|
| 114 |
- score_t score = (j || i) ? SCORE_MIN : 0; |
|
| 114 |
+ score_t score = SCORE_MIN; |
|
| 115 | 115 |
int match = tolower(needle[i]) == tolower(haystack[j]); |
| 116 | 116 |
D[i][j] = SCORE_MIN; |
| 117 | 117 |
if(match){
|
| ... | ... |
@@ -23,9 +23,17 @@ typedef double score_t; |
| 23 | 23 |
#define SCORE_MIN -INFINITY |
| 24 | 24 |
|
| 25 | 25 |
/* print one of the internal matrices */ |
| 26 |
-void mat_print(score_t *mat, int n, int m){
|
|
| 26 |
+void mat_print(score_t *mat, const char *needle, const char *haystack){
|
|
| 27 |
+ int n = strlen(needle); |
|
| 28 |
+ int m = strlen(haystack); |
|
| 27 | 29 |
int i, j; |
| 30 |
+ fprintf(stderr, " "); |
|
| 31 |
+ for(j = 0; j < m; j++){
|
|
| 32 |
+ fprintf(stderr, " %c", haystack[j]); |
|
| 33 |
+ } |
|
| 34 |
+ fprintf(stderr, "\n"); |
|
| 28 | 35 |
for(i = 0; i < n; i++){
|
| 36 |
+ fprintf(stderr, " %c |", needle[i]); |
|
| 29 | 37 |
for(j = 0; j < m; j++){
|
| 30 | 38 |
score_t val = mat[i*m + j]; |
| 31 | 39 |
if(val == SCORE_MIN){
|
| ... | ... |
@@ -129,10 +137,10 @@ double calculate_score(const char *needle, const char *haystack, size_t *positio |
| 129 | 137 |
} |
| 130 | 138 |
|
| 131 | 139 |
#if 0 |
| 132 |
- printf("\"%s\" =~ \"%s\"\n", needle, haystack);
|
|
| 133 |
- mat_print(&D[0][0], n, m); |
|
| 134 |
- mat_print(&M[0][0], n, m); |
|
| 135 |
- printf("\n");
|
|
| 140 |
+ fprintf(stderr, "\"%s\" =~ \"%s\"\n", needle, haystack); |
|
| 141 |
+ mat_print(&D[0][0], needle, haystack); |
|
| 142 |
+ mat_print(&M[0][0], needle, haystack); |
|
| 143 |
+ fprintf(stderr, "\n"); |
|
| 136 | 144 |
#endif |
| 137 | 145 |
|
| 138 | 146 |
/* backtrace to find the positions of optimal matching */ |
| ... | ... |
@@ -52,7 +52,7 @@ double calculate_score(const char *needle, const char *haystack, size_t *positio |
| 52 | 52 |
* If it is a valid match it will still be returned, it will |
| 53 | 53 |
* just be ranked below any reasonably sized candidates |
| 54 | 54 |
*/ |
| 55 |
- return 0; |
|
| 55 |
+ return SCORE_MIN; |
|
| 56 | 56 |
} |
| 57 | 57 |
|
| 58 | 58 |
score_t match_bonus[m]; |
Change-Id: Iea655e766abdaec8e7f3e2c3aa5d23274636cfb9
| ... | ... |
@@ -155,7 +155,7 @@ double calculate_score(const char *needle, const char *haystack, size_t *positio |
| 155 | 155 |
} |
| 156 | 156 |
} |
| 157 | 157 |
|
| 158 |
- return (float)(M[n-1][m-1]) / (float)(n * 2 + 1); |
|
| 158 |
+ return M[n-1][m-1]; |
|
| 159 | 159 |
} |
| 160 | 160 |
|
| 161 | 161 |
double match_positions(const char *needle, const char *haystack, size_t *positions){
|
| ... | ... |
@@ -103,7 +103,7 @@ double calculate_score(const char *needle, const char *haystack, size_t *positio |
| 103 | 103 |
|
| 104 | 104 |
for(int i = 0; i < n; i++){
|
| 105 | 105 |
for(int j = 0; j < m; j++){
|
| 106 |
- score_t score = j ? SCORE_MIN : 0; |
|
| 106 |
+ score_t score = (j || i) ? SCORE_MIN : 0; |
|
| 107 | 107 |
int match = tolower(needle[i]) == tolower(haystack[j]); |
| 108 | 108 |
D[i][j] = SCORE_MIN; |
| 109 | 109 |
if(match){
|
| ... | ... |
@@ -81,7 +81,7 @@ double calculate_score(const char *needle, const char *haystack, size_t *positio |
| 81 | 81 |
|
| 82 | 82 |
score_t score = 0; |
| 83 | 83 |
if(isalnum(ch)){
|
| 84 |
- if(last_ch == '/'){
|
|
| 84 |
+ if(!last_ch || last_ch == '/'){
|
|
| 85 | 85 |
score = SCORE_MATCH_SLASH; |
| 86 | 86 |
}else if(last_ch == '-' || |
| 87 | 87 |
last_ch == '_' || |
| ... | ... |
@@ -29,9 +29,9 @@ void mat_print(score_t *mat, int n, int m){
|
| 29 | 29 |
for(j = 0; j < m; j++){
|
| 30 | 30 |
score_t val = mat[i*m + j]; |
| 31 | 31 |
if(val == SCORE_MIN){
|
| 32 |
- fprintf(stderr, " -inf"); |
|
| 32 |
+ fprintf(stderr, " -inf"); |
|
| 33 | 33 |
}else{
|
| 34 |
- fprintf(stderr, " %.2f", val); |
|
| 34 |
+ fprintf(stderr, " % .2f", val); |
|
| 35 | 35 |
} |
| 36 | 36 |
} |
| 37 | 37 |
fprintf(stderr, "\n"); |
| ... | ... |
@@ -65,8 +65,8 @@ double calculate_score(const char *needle, const char *haystack, size_t *positio |
| 65 | 65 |
* M[][] Stores the best possible score at this position. |
| 66 | 66 |
*/ |
| 67 | 67 |
|
| 68 |
-#define SCORE_GAP_LEADING -0.01 |
|
| 69 |
-#define SCORE_GAP_TRAILING -0.01 |
|
| 68 |
+#define SCORE_GAP_LEADING -0.005 |
|
| 69 |
+#define SCORE_GAP_TRAILING -0.005 |
|
| 70 | 70 |
#define SCORE_GAP_INNER -0.01 |
| 71 | 71 |
#define SCORE_MATCH_CONSECUTIVE 1.0 |
| 72 | 72 |
#define SCORE_MATCH_SLASH 1.5 |
| ... | ... |
@@ -129,8 +129,10 @@ double calculate_score(const char *needle, const char *haystack, size_t *positio |
| 129 | 129 |
} |
| 130 | 130 |
|
| 131 | 131 |
#if 0 |
| 132 |
+ printf("\"%s\" =~ \"%s\"\n", needle, haystack);
|
|
| 132 | 133 |
mat_print(&D[0][0], n, m); |
| 133 | 134 |
mat_print(&M[0][0], n, m); |
| 135 |
+ printf("\n");
|
|
| 134 | 136 |
#endif |
| 135 | 137 |
|
| 136 | 138 |
/* backtrace to find the positions of optimal matching */ |
| ... | ... |
@@ -65,6 +65,15 @@ double calculate_score(const char *needle, const char *haystack, size_t *positio |
| 65 | 65 |
* M[][] Stores the best possible score at this position. |
| 66 | 66 |
*/ |
| 67 | 67 |
|
| 68 |
+#define SCORE_GAP_LEADING -0.01 |
|
| 69 |
+#define SCORE_GAP_TRAILING -0.01 |
|
| 70 |
+#define SCORE_GAP_INNER -0.01 |
|
| 71 |
+#define SCORE_MATCH_CONSECUTIVE 1.0 |
|
| 72 |
+#define SCORE_MATCH_SLASH 1.5 |
|
| 73 |
+#define SCORE_MATCH_WORD 1.2 |
|
| 74 |
+#define SCORE_MATCH_CAPITAL 1.1 |
|
| 75 |
+#define SCORE_MATCH_DOT 0.8 |
|
| 76 |
+ |
|
| 68 | 77 |
/* Which positions are beginning of words */ |
| 69 | 78 |
char last_ch = '\0'; |
| 70 | 79 |
for(int i = 0; i < m; i++){
|
| ... | ... |
@@ -73,18 +82,18 @@ double calculate_score(const char *needle, const char *haystack, size_t *positio |
| 73 | 82 |
score_t score = 0; |
| 74 | 83 |
if(isalnum(ch)){
|
| 75 | 84 |
if(last_ch == '/'){
|
| 76 |
- score = 1.5; |
|
| 85 |
+ score = SCORE_MATCH_SLASH; |
|
| 77 | 86 |
}else if(last_ch == '-' || |
| 78 | 87 |
last_ch == '_' || |
| 79 | 88 |
last_ch == ' ' || |
| 80 | 89 |
(last_ch >= '0' && last_ch <= '9')){
|
| 81 |
- score = 1.2; |
|
| 90 |
+ score = SCORE_MATCH_WORD; |
|
| 82 | 91 |
}else if(last_ch >= 'a' && last_ch <= 'z' && |
| 83 | 92 |
ch >= 'A' && ch <= 'Z'){
|
| 84 | 93 |
/* CamelCase */ |
| 85 |
- score = 1.1; |
|
| 94 |
+ score = SCORE_MATCH_CAPITAL; |
|
| 86 | 95 |
}else if(last_ch == '.'){
|
| 87 |
- score = 0.8; |
|
| 96 |
+ score = SCORE_MATCH_DOT; |
|
| 88 | 97 |
} |
| 89 | 98 |
} |
| 90 | 99 |
|
| ... | ... |
@@ -102,14 +111,18 @@ double calculate_score(const char *needle, const char *haystack, size_t *positio |
| 102 | 111 |
score = max(score, M[i-1][j-1] + match_bonus[j]); |
| 103 | 112 |
|
| 104 | 113 |
/* consecutive match, doesn't stack with match_bonus */ |
| 105 |
- score = max(score, D[i-1][j-1] + 1.0); |
|
| 114 |
+ score = max(score, D[i-1][j-1] + SCORE_MATCH_CONSECUTIVE); |
|
| 106 | 115 |
}else if(!i){
|
| 107 |
- score = (j * -0.01) + match_bonus[j]; |
|
| 116 |
+ score = (j * SCORE_GAP_LEADING) + match_bonus[j]; |
|
| 108 | 117 |
} |
| 109 | 118 |
D[i][j] = score; |
| 110 | 119 |
} |
| 111 | 120 |
if(j){
|
| 112 |
- score = max(score, M[i][j-1] - 0.01); |
|
| 121 |
+ if(i == n-1){
|
|
| 122 |
+ score = max(score, M[i][j-1] + SCORE_GAP_TRAILING); |
|
| 123 |
+ }else{
|
|
| 124 |
+ score = max(score, M[i][j-1] + SCORE_GAP_INNER); |
|
| 125 |
+ } |
|
| 113 | 126 |
} |
| 114 | 127 |
M[i][j] = score; |
| 115 | 128 |
} |
| ... | ... |
@@ -3,6 +3,7 @@ |
| 3 | 3 |
#include <strings.h> |
| 4 | 4 |
#include <stdio.h> |
| 5 | 5 |
#include <float.h> |
| 6 |
+#include <math.h> |
|
| 6 | 7 |
|
| 7 | 8 |
#include "fzy.h" |
| 8 | 9 |
|
| ... | ... |
@@ -18,8 +19,8 @@ int has_match(const char *needle, const char *haystack){
|
| 18 | 19 |
|
| 19 | 20 |
#define max(a, b) (((a) > (b)) ? (a) : (b)) |
| 20 | 21 |
typedef double score_t; |
| 21 |
-#define SCORE_MAX DBL_MAX |
|
| 22 |
-#define SCORE_MIN -DBL_MAX |
|
| 22 |
+#define SCORE_MAX INFINITY |
|
| 23 |
+#define SCORE_MIN -INFINITY |
|
| 23 | 24 |
|
| 24 | 25 |
/* print one of the internal matrices */ |
| 25 | 26 |
void mat_print(score_t *mat, int n, int m){
|
| ... | ... |
@@ -65,34 +66,52 @@ double calculate_score(const char *needle, const char *haystack, size_t *positio |
| 65 | 66 |
*/ |
| 66 | 67 |
|
| 67 | 68 |
/* Which positions are beginning of words */ |
| 68 |
- int at_bow = 1; |
|
| 69 | 69 |
char last_ch = '\0'; |
| 70 | 70 |
for(int i = 0; i < m; i++){
|
| 71 | 71 |
char ch = haystack[i]; |
| 72 |
- /* TODO: What about allcaps (ex. README) */ |
|
| 73 |
- int bow = (at_bow && isalnum(ch)) || (isupper(ch) && !isupper(last_ch)); |
|
| 74 |
- at_bow = !isalnum(ch); |
|
| 75 |
- last_ch = ch; |
|
| 76 | 72 |
|
| 77 |
- match_bonus[i] = bow ? 1.5 : 0; |
|
| 73 |
+ score_t score = 0; |
|
| 74 |
+ if(isalnum(ch)){
|
|
| 75 |
+ if(last_ch == '/'){
|
|
| 76 |
+ score = 1.5; |
|
| 77 |
+ }else if(last_ch == '-' || |
|
| 78 |
+ last_ch == '_' || |
|
| 79 |
+ last_ch == ' ' || |
|
| 80 |
+ (last_ch >= '0' && last_ch <= '9')){
|
|
| 81 |
+ score = 1.2; |
|
| 82 |
+ }else if(last_ch >= 'a' && last_ch <= 'z' && |
|
| 83 |
+ ch >= 'A' && ch <= 'Z'){
|
|
| 84 |
+ /* CamelCase */ |
|
| 85 |
+ score = 1.1; |
|
| 86 |
+ }else if(last_ch == '.'){
|
|
| 87 |
+ score = 0.8; |
|
| 88 |
+ } |
|
| 89 |
+ } |
|
| 90 |
+ |
|
| 91 |
+ match_bonus[i] = score; |
|
| 92 |
+ last_ch = ch; |
|
| 78 | 93 |
} |
| 79 | 94 |
|
| 80 | 95 |
for(int i = 0; i < n; i++){
|
| 81 | 96 |
for(int j = 0; j < m; j++){
|
| 82 |
- D[i][j] = SCORE_MIN; |
|
| 97 |
+ score_t score = j ? SCORE_MIN : 0; |
|
| 83 | 98 |
int match = tolower(needle[i]) == tolower(haystack[j]); |
| 99 |
+ D[i][j] = SCORE_MIN; |
|
| 84 | 100 |
if(match){
|
| 85 |
- score_t score = 0; |
|
| 86 | 101 |
if(i && j){
|
| 87 |
- score = M[i-1][j-1] + match_bonus[j]; |
|
| 102 |
+ score = max(score, M[i-1][j-1] + match_bonus[j]); |
|
| 88 | 103 |
|
| 89 | 104 |
/* consecutive match, doesn't stack with match_bonus */ |
| 90 |
- score = max(score, 1 + D[i-1][j-1]); |
|
| 105 |
+ score = max(score, D[i-1][j-1] + 1.0); |
|
| 106 |
+ }else if(!i){
|
|
| 107 |
+ score = (j * -0.01) + match_bonus[j]; |
|
| 91 | 108 |
} |
| 92 |
- M[i][j] = D[i][j] = score; |
|
| 109 |
+ D[i][j] = score; |
|
| 110 |
+ } |
|
| 111 |
+ if(j){
|
|
| 112 |
+ score = max(score, M[i][j-1] - 0.01); |
|
| 93 | 113 |
} |
| 94 |
- if(j) |
|
| 95 |
- M[i][j] = max(M[i][j], M[i][j-1] - 0.05); |
|
| 114 |
+ M[i][j] = score; |
|
| 96 | 115 |
} |
| 97 | 116 |
} |
| 98 | 117 |
|
| ... | ... |
@@ -113,8 +132,8 @@ double calculate_score(const char *needle, const char *haystack, size_t *positio |
| 113 | 132 |
* we encounter, the latest in the candidate |
| 114 | 133 |
* string. |
| 115 | 134 |
*/ |
| 116 |
- if(D[i][j] == M[i][j]){
|
|
| 117 |
- positions[i] = j; |
|
| 135 |
+ if(tolower(needle[i]) == tolower(haystack[j]) && D[i][j] == M[i][j]){
|
|
| 136 |
+ positions[i] = j--; |
|
| 118 | 137 |
break; |
| 119 | 138 |
} |
| 120 | 139 |
} |
| ... | ... |
@@ -54,7 +54,7 @@ double calculate_score(const char *needle, const char *haystack, size_t *positio |
| 54 | 54 |
return 0; |
| 55 | 55 |
} |
| 56 | 56 |
|
| 57 |
- int bow[m]; |
|
| 57 |
+ score_t match_bonus[m]; |
|
| 58 | 58 |
score_t D[n][m], M[n][m]; |
| 59 | 59 |
bzero(D, sizeof(D)); |
| 60 | 60 |
bzero(M, sizeof(M)); |
| ... | ... |
@@ -70,9 +70,11 @@ double calculate_score(const char *needle, const char *haystack, size_t *positio |
| 70 | 70 |
for(int i = 0; i < m; i++){
|
| 71 | 71 |
char ch = haystack[i]; |
| 72 | 72 |
/* TODO: What about allcaps (ex. README) */ |
| 73 |
- bow[i] = (at_bow && isalnum(ch)) || (isupper(ch) && !isupper(last_ch)); |
|
| 73 |
+ int bow = (at_bow && isalnum(ch)) || (isupper(ch) && !isupper(last_ch)); |
|
| 74 | 74 |
at_bow = !isalnum(ch); |
| 75 | 75 |
last_ch = ch; |
| 76 |
+ |
|
| 77 |
+ match_bonus[i] = bow ? 1.5 : 0; |
|
| 76 | 78 |
} |
| 77 | 79 |
|
| 78 | 80 |
for(int i = 0; i < n; i++){
|
| ... | ... |
@@ -81,12 +83,12 @@ double calculate_score(const char *needle, const char *haystack, size_t *positio |
| 81 | 83 |
int match = tolower(needle[i]) == tolower(haystack[j]); |
| 82 | 84 |
if(match){
|
| 83 | 85 |
score_t score = 0; |
| 84 |
- if(i && j) |
|
| 85 |
- score = M[i-1][j-1]; |
|
| 86 |
- if(bow[j]) |
|
| 87 |
- score += 1.5; |
|
| 88 |
- else if(i && j && D[i-1][j-1]) |
|
| 86 |
+ if(i && j){
|
|
| 87 |
+ score = M[i-1][j-1] + match_bonus[j]; |
|
| 88 |
+ |
|
| 89 |
+ /* consecutive match, doesn't stack with match_bonus */ |
|
| 89 | 90 |
score = max(score, 1 + D[i-1][j-1]); |
| 91 |
+ } |
|
| 90 | 92 |
M[i][j] = D[i][j] = score; |
| 91 | 93 |
} |
| 92 | 94 |
if(j) |
| ... | ... |
@@ -16,23 +16,28 @@ int has_match(const char *needle, const char *haystack){
|
| 16 | 16 |
return 1; |
| 17 | 17 |
} |
| 18 | 18 |
|
| 19 |
+#define max(a, b) (((a) > (b)) ? (a) : (b)) |
|
| 20 |
+typedef double score_t; |
|
| 21 |
+#define SCORE_MAX DBL_MAX |
|
| 22 |
+#define SCORE_MIN -DBL_MAX |
|
| 23 |
+ |
|
| 19 | 24 |
/* print one of the internal matrices */ |
| 20 |
-void mat_print(int *mat, int n, int m){
|
|
| 25 |
+void mat_print(score_t *mat, int n, int m){
|
|
| 21 | 26 |
int i, j; |
| 22 | 27 |
for(i = 0; i < n; i++){
|
| 23 | 28 |
for(j = 0; j < m; j++){
|
| 24 |
- fprintf(stderr, " %3i", mat[i*m + j]); |
|
| 29 |
+ score_t val = mat[i*m + j]; |
|
| 30 |
+ if(val == SCORE_MIN){
|
|
| 31 |
+ fprintf(stderr, " -inf"); |
|
| 32 |
+ }else{
|
|
| 33 |
+ fprintf(stderr, " %.2f", val); |
|
| 34 |
+ } |
|
| 25 | 35 |
} |
| 26 | 36 |
fprintf(stderr, "\n"); |
| 27 | 37 |
} |
| 28 | 38 |
fprintf(stderr, "\n\n"); |
| 29 | 39 |
} |
| 30 | 40 |
|
| 31 |
-#define max(a, b) (((a) > (b)) ? (a) : (b)) |
|
| 32 |
-typedef double score_t; |
|
| 33 |
-#define SCORE_MAX DBL_MAX |
|
| 34 |
-#define SCORE_MIN -DBL_MAX |
|
| 35 |
- |
|
| 36 | 41 |
double calculate_score(const char *needle, const char *haystack, size_t *positions){
|
| 37 | 42 |
if(!*haystack || !*needle) |
| 38 | 43 |
return SCORE_MIN; |
| ... | ... |
@@ -89,6 +94,11 @@ double calculate_score(const char *needle, const char *haystack, size_t *positio |
| 89 | 94 |
} |
| 90 | 95 |
} |
| 91 | 96 |
|
| 97 |
+#if 0 |
|
| 98 |
+ mat_print(&D[0][0], n, m); |
|
| 99 |
+ mat_print(&M[0][0], n, m); |
|
| 100 |
+#endif |
|
| 101 |
+ |
|
| 92 | 102 |
/* backtrace to find the positions of optimal matching */ |
| 93 | 103 |
if(positions){
|
| 94 | 104 |
for(int i = n-1, j = m-1; i >= 0; i--){
|
-0.05 for a skipped character in the candidate.
+1 for a match following a previous match
+1.5 for a match at the beginning of a word
No change for any other match.
| ... | ... |
@@ -29,7 +29,7 @@ void mat_print(int *mat, int n, int m){
|
| 29 | 29 |
} |
| 30 | 30 |
|
| 31 | 31 |
#define max(a, b) (((a) > (b)) ? (a) : (b)) |
| 32 |
-typedef int score_t; |
|
| 32 |
+typedef double score_t; |
|
| 33 | 33 |
#define SCORE_MAX DBL_MAX |
| 34 | 34 |
#define SCORE_MIN -DBL_MAX |
| 35 | 35 |
|
| ... | ... |
@@ -50,7 +50,7 @@ double calculate_score(const char *needle, const char *haystack, size_t *positio |
| 50 | 50 |
} |
| 51 | 51 |
|
| 52 | 52 |
int bow[m]; |
| 53 |
- int D[n][m], M[n][m]; |
|
| 53 |
+ score_t D[n][m], M[n][m]; |
|
| 54 | 54 |
bzero(D, sizeof(D)); |
| 55 | 55 |
bzero(M, sizeof(M)); |
| 56 | 56 |
|
| ... | ... |
@@ -72,39 +72,38 @@ double calculate_score(const char *needle, const char *haystack, size_t *positio |
| 72 | 72 |
|
| 73 | 73 |
for(int i = 0; i < n; i++){
|
| 74 | 74 |
for(int j = 0; j < m; j++){
|
| 75 |
+ D[i][j] = SCORE_MIN; |
|
| 75 | 76 |
int match = tolower(needle[i]) == tolower(haystack[j]); |
| 76 | 77 |
if(match){
|
| 77 | 78 |
score_t score = 0; |
| 78 | 79 |
if(i && j) |
| 79 | 80 |
score = M[i-1][j-1]; |
| 80 | 81 |
if(bow[j]) |
| 81 |
- score += 2; |
|
| 82 |
+ score += 1.5; |
|
| 82 | 83 |
else if(i && j && D[i-1][j-1]) |
| 83 | 84 |
score = max(score, 1 + D[i-1][j-1]); |
| 84 | 85 |
M[i][j] = D[i][j] = score; |
| 85 | 86 |
} |
| 86 | 87 |
if(j) |
| 87 |
- M[i][j] = max(M[i][j], M[i][j-1]); |
|
| 88 |
+ M[i][j] = max(M[i][j], M[i][j-1] - 0.05); |
|
| 88 | 89 |
} |
| 89 | 90 |
} |
| 90 | 91 |
|
| 91 | 92 |
/* backtrace to find the positions of optimal matching */ |
| 92 | 93 |
if(positions){
|
| 93 | 94 |
for(int i = n-1, j = m-1; i >= 0; i--){
|
| 94 |
- int last = M[i][j]; |
|
| 95 |
- for(; j >= 0 && M[i][j] == last; j--){
|
|
| 95 |
+ for(; j >= 0; j--){
|
|
| 96 | 96 |
/* |
| 97 | 97 |
* There may be multiple paths which result in |
| 98 | 98 |
* the optimal weight. |
| 99 | 99 |
* |
| 100 |
- * Since we don't exit the loop on the first |
|
| 101 |
- * match, positions[i] may be assigned to |
|
| 102 |
- * multiple times. Since we are decrementing i |
|
| 103 |
- * and j, this favours the optimal path |
|
| 104 |
- * occurring earlier in the string. |
|
| 100 |
+ * For simplicity, we will pick the first one |
|
| 101 |
+ * we encounter, the latest in the candidate |
|
| 102 |
+ * string. |
|
| 105 | 103 |
*/ |
| 106 |
- if(tolower(needle[i]) == tolower(haystack[j])){
|
|
| 104 |
+ if(D[i][j] == M[i][j]){
|
|
| 107 | 105 |
positions[i] = j; |
| 106 |
+ break; |
|
| 108 | 107 |
} |
| 109 | 108 |
} |
| 110 | 109 |
} |
SCORE_MAX is now defined as DBL_MAX
SCORE_MIN is now defined as -DBL_MAX
| ... | ... |
@@ -2,11 +2,10 @@ |
| 2 | 2 |
#include <string.h> |
| 3 | 3 |
#include <strings.h> |
| 4 | 4 |
#include <stdio.h> |
| 5 |
+#include <float.h> |
|
| 5 | 6 |
|
| 6 | 7 |
#include "fzy.h" |
| 7 | 8 |
|
| 8 |
-#define SCORE_MIN -1 |
|
| 9 |
- |
|
| 10 | 9 |
int has_match(const char *needle, const char *haystack){
|
| 11 | 10 |
while(*needle){
|
| 12 | 11 |
if(!*haystack) |
| ... | ... |
@@ -31,6 +30,8 @@ void mat_print(int *mat, int n, int m){
|
| 31 | 30 |
|
| 32 | 31 |
#define max(a, b) (((a) > (b)) ? (a) : (b)) |
| 33 | 32 |
typedef int score_t; |
| 33 |
+#define SCORE_MAX DBL_MAX |
|
| 34 |
+#define SCORE_MIN -DBL_MAX |
|
| 34 | 35 |
|
| 35 | 36 |
double calculate_score(const char *needle, const char *haystack, size_t *positions){
|
| 36 | 37 |
if(!*haystack || !*needle) |
| ... | ... |
@@ -114,7 +115,7 @@ double calculate_score(const char *needle, const char *haystack, size_t *positio |
| 114 | 115 |
|
| 115 | 116 |
double match_positions(const char *needle, const char *haystack, size_t *positions){
|
| 116 | 117 |
if(!*needle){
|
| 117 |
- return 1.0; |
|
| 118 |
+ return SCORE_MAX; |
|
| 118 | 119 |
}else if(!has_match(needle, haystack)){
|
| 119 | 120 |
return SCORE_MIN; |
| 120 | 121 |
}else if(!strcasecmp(needle, haystack)){
|
| ... | ... |
@@ -123,7 +124,7 @@ double match_positions(const char *needle, const char *haystack, size_t *positio |
| 123 | 124 |
for(int i = 0; i < n; i++) |
| 124 | 125 |
positions[i] = i; |
| 125 | 126 |
} |
| 126 |
- return 1.0; |
|
| 127 |
+ return SCORE_MAX; |
|
| 127 | 128 |
}else{
|
| 128 | 129 |
return calculate_score(needle, haystack, positions); |
| 129 | 130 |
} |
Previously a successful match was determined by the score being
positive. Now we will use has_match instead.
| ... | ... |
@@ -7,7 +7,7 @@ |
| 7 | 7 |
|
| 8 | 8 |
#define SCORE_MIN -1 |
| 9 | 9 |
|
| 10 |
-static int is_subset(const char *needle, const char *haystack){
|
|
| 10 |
+int has_match(const char *needle, const char *haystack){
|
|
| 11 | 11 |
while(*needle){
|
| 12 | 12 |
if(!*haystack) |
| 13 | 13 |
return 0; |
| ... | ... |
@@ -115,7 +115,7 @@ double calculate_score(const char *needle, const char *haystack, size_t *positio |
| 115 | 115 |
double match_positions(const char *needle, const char *haystack, size_t *positions){
|
| 116 | 116 |
if(!*needle){
|
| 117 | 117 |
return 1.0; |
| 118 |
- }else if(!is_subset(needle, haystack)){
|
|
| 118 |
+ }else if(!has_match(needle, haystack)){
|
|
| 119 | 119 |
return SCORE_MIN; |
| 120 | 120 |
}else if(!strcasecmp(needle, haystack)){
|
| 121 | 121 |
if(positions){
|
| ... | ... |
@@ -30,7 +30,7 @@ void mat_print(int *mat, int n, int m){
|
| 30 | 30 |
#define max(a, b) (((a) > (b)) ? (a) : (b)) |
| 31 | 31 |
typedef int score_t; |
| 32 | 32 |
|
| 33 |
-double calculate_score(const char *needle, const char *haystack){
|
|
| 33 |
+double calculate_score(const char *needle, const char *haystack, size_t *positions){
|
|
| 34 | 34 |
if(!*haystack || !*needle) |
| 35 | 35 |
return SCORE_MIN; |
| 36 | 36 |
|
| ... | ... |
@@ -85,17 +85,48 @@ double calculate_score(const char *needle, const char *haystack){
|
| 85 | 85 |
} |
| 86 | 86 |
} |
| 87 | 87 |
|
| 88 |
+ /* backtrace to find the positions of optimal matching */ |
|
| 89 |
+ if(positions){
|
|
| 90 |
+ for(int i = n-1, j = m-1; i >= 0; i--){
|
|
| 91 |
+ int last = M[i][j]; |
|
| 92 |
+ for(; j >= 0 && M[i][j] == last; j--){
|
|
| 93 |
+ /* |
|
| 94 |
+ * There may be multiple paths which result in |
|
| 95 |
+ * the optimal weight. |
|
| 96 |
+ * |
|
| 97 |
+ * Since we don't exit the loop on the first |
|
| 98 |
+ * match, positions[i] may be assigned to |
|
| 99 |
+ * multiple times. Since we are decrementing i |
|
| 100 |
+ * and j, this favours the optimal path |
|
| 101 |
+ * occurring earlier in the string. |
|
| 102 |
+ */ |
|
| 103 |
+ if(tolower(needle[i]) == tolower(haystack[j])){
|
|
| 104 |
+ positions[i] = j; |
|
| 105 |
+ } |
|
| 106 |
+ } |
|
| 107 |
+ } |
|
| 108 |
+ } |
|
| 109 |
+ |
|
| 88 | 110 |
return (float)(M[n-1][m-1]) / (float)(n * 2 + 1); |
| 89 | 111 |
} |
| 90 | 112 |
|
| 91 |
-double match(const char *needle, const char *haystack){
|
|
| 113 |
+double match_positions(const char *needle, const char *haystack, size_t *positions){
|
|
| 92 | 114 |
if(!*needle){
|
| 93 | 115 |
return 1.0; |
| 94 | 116 |
}else if(!is_subset(needle, haystack)){
|
| 95 | 117 |
return SCORE_MIN; |
| 96 | 118 |
}else if(!strcasecmp(needle, haystack)){
|
| 119 |
+ if(positions){
|
|
| 120 |
+ int n = strlen(needle); |
|
| 121 |
+ for(int i = 0; i < n; i++) |
|
| 122 |
+ positions[i] = i; |
|
| 123 |
+ } |
|
| 97 | 124 |
return 1.0; |
| 98 | 125 |
}else{
|
| 99 |
- return calculate_score(needle, haystack); |
|
| 126 |
+ return calculate_score(needle, haystack, positions); |
|
| 100 | 127 |
} |
| 101 | 128 |
} |
| 129 |
+ |
|
| 130 |
+double match(const char *needle, const char *haystack){
|
|
| 131 |
+ return match_positions(needle, haystack, NULL); |
|
| 132 |
+} |
Only treat the first letter of an ALLCAPS word as the biginning of that
word (it is not CamelCase).
| ... | ... |
@@ -58,11 +58,13 @@ double calculate_score(const char *needle, const char *haystack){
|
| 58 | 58 |
|
| 59 | 59 |
/* Which positions are beginning of words */ |
| 60 | 60 |
int at_bow = 1; |
| 61 |
+ char last_ch = '\0'; |
|
| 61 | 62 |
for(int i = 0; i < m; i++){
|
| 62 | 63 |
char ch = haystack[i]; |
| 63 | 64 |
/* TODO: What about allcaps (ex. README) */ |
| 64 |
- bow[i] = (at_bow && isalnum(ch)) || isupper(ch); |
|
| 65 |
+ bow[i] = (at_bow && isalnum(ch)) || (isupper(ch) && !isupper(last_ch)); |
|
| 65 | 66 |
at_bow = !isalnum(ch); |
| 67 |
+ last_ch = ch; |
|
| 66 | 68 |
} |
| 67 | 69 |
|
| 68 | 70 |
for(int i = 0; i < n; i++){
|
| ... | ... |
@@ -31,9 +31,21 @@ void mat_print(int *mat, int n, int m){
|
| 31 | 31 |
typedef int score_t; |
| 32 | 32 |
|
| 33 | 33 |
double calculate_score(const char *needle, const char *haystack){
|
| 34 |
+ if(!*haystack || !*needle) |
|
| 35 |
+ return SCORE_MIN; |
|
| 36 |
+ |
|
| 34 | 37 |
int n = strlen(needle); |
| 35 | 38 |
int m = strlen(haystack); |
| 36 | 39 |
|
| 40 |
+ if(m > 1024){
|
|
| 41 |
+ /* |
|
| 42 |
+ * Unreasonably large candidate: return no score |
|
| 43 |
+ * If it is a valid match it will still be returned, it will |
|
| 44 |
+ * just be ranked below any reasonably sized candidates |
|
| 45 |
+ */ |
|
| 46 |
+ return 0; |
|
| 47 |
+ } |
|
| 48 |
+ |
|
| 37 | 49 |
int bow[m]; |
| 38 | 50 |
int D[n][m], M[n][m]; |
| 39 | 51 |
bzero(D, sizeof(D)); |
| ... | ... |
@@ -3,6 +3,8 @@ |
| 3 | 3 |
#include <strings.h> |
| 4 | 4 |
#include <stdio.h> |
| 5 | 5 |
|
| 6 |
+#define SCORE_MIN -1 |
|
| 7 |
+ |
|
| 6 | 8 |
static int is_subset(const char *needle, const char *haystack){
|
| 7 | 9 |
while(*needle){
|
| 8 | 10 |
if(!*haystack) |
| ... | ... |
@@ -76,7 +78,7 @@ double match(const char *needle, const char *haystack){
|
| 76 | 78 |
if(!*needle){
|
| 77 | 79 |
return 1.0; |
| 78 | 80 |
}else if(!is_subset(needle, haystack)){
|
| 79 |
- return -1.0; |
|
| 81 |
+ return SCORE_MIN; |
|
| 80 | 82 |
}else if(!strcasecmp(needle, haystack)){
|
| 81 | 83 |
return 1.0; |
| 82 | 84 |
}else{
|
| ... | ... |
@@ -1,5 +1,7 @@ |
| 1 | 1 |
#include <ctype.h> |
| 2 | 2 |
#include <string.h> |
| 3 |
+#include <strings.h> |
|
| 4 |
+#include <stdio.h> |
|
| 3 | 5 |
|
| 4 | 6 |
static int is_subset(const char *needle, const char *haystack){
|
| 5 | 7 |
while(*needle){
|
| ... | ... |
@@ -11,14 +13,73 @@ static int is_subset(const char *needle, const char *haystack){
|
| 11 | 13 |
return 1; |
| 12 | 14 |
} |
| 13 | 15 |
|
| 16 |
+/* print one of the internal matrices */ |
|
| 17 |
+void mat_print(int *mat, int n, int m){
|
|
| 18 |
+ int i, j; |
|
| 19 |
+ for(i = 0; i < n; i++){
|
|
| 20 |
+ for(j = 0; j < m; j++){
|
|
| 21 |
+ fprintf(stderr, " %3zd", mat[i*m + j]); |
|
| 22 |
+ } |
|
| 23 |
+ fprintf(stderr, "\n"); |
|
| 24 |
+ } |
|
| 25 |
+ fprintf(stderr, "\n\n"); |
|
| 26 |
+} |
|
| 27 |
+ |
|
| 28 |
+#define max(a, b) (((a) > (b)) ? (a) : (b)) |
|
| 29 |
+typedef int score_t; |
|
| 30 |
+ |
|
| 31 |
+double calculate_score(const char *needle, const char *haystack){
|
|
| 32 |
+ int n = strlen(needle); |
|
| 33 |
+ int m = strlen(haystack); |
|
| 34 |
+ |
|
| 35 |
+ int bow[m]; |
|
| 36 |
+ int D[n][m], M[n][m]; |
|
| 37 |
+ bzero(D, sizeof(D)); |
|
| 38 |
+ bzero(M, sizeof(M)); |
|
| 39 |
+ |
|
| 40 |
+ /* |
|
| 41 |
+ * D[][] Stores the best score for this position ending with a match. |
|
| 42 |
+ * M[][] Stores the best possible score at this position. |
|
| 43 |
+ */ |
|
| 44 |
+ |
|
| 45 |
+ /* Which positions are beginning of words */ |
|
| 46 |
+ int at_bow = 1; |
|
| 47 |
+ for(int i = 0; i < m; i++){
|
|
| 48 |
+ char ch = haystack[i]; |
|
| 49 |
+ /* TODO: What about allcaps (ex. README) */ |
|
| 50 |
+ bow[i] = (at_bow && isalnum(ch)) || isupper(ch); |
|
| 51 |
+ at_bow = !isalnum(ch); |
|
| 52 |
+ } |
|
| 53 |
+ |
|
| 54 |
+ for(int i = 0; i < n; i++){
|
|
| 55 |
+ for(int j = 0; j < m; j++){
|
|
| 56 |
+ int match = tolower(needle[i]) == tolower(haystack[j]); |
|
| 57 |
+ if(match){
|
|
| 58 |
+ score_t score = 0; |
|
| 59 |
+ if(i && j) |
|
| 60 |
+ score = M[i-1][j-1]; |
|
| 61 |
+ if(bow[j]) |
|
| 62 |
+ score += 2; |
|
| 63 |
+ else if(i && j && D[i-1][j-1]) |
|
| 64 |
+ score = max(score, 1 + D[i-1][j-1]); |
|
| 65 |
+ M[i][j] = D[i][j] = score; |
|
| 66 |
+ } |
|
| 67 |
+ if(j) |
|
| 68 |
+ M[i][j] = max(M[i][j], M[i][j-1]); |
|
| 69 |
+ } |
|
| 70 |
+ } |
|
| 71 |
+ |
|
| 72 |
+ return (float)(M[n-1][m-1]) / (float)(n * 2 + 1); |
|
| 73 |
+} |
|
| 74 |
+ |
|
| 14 | 75 |
double match(const char *needle, const char *haystack){
|
| 15 | 76 |
if(!*needle){
|
| 16 | 77 |
return 1.0; |
| 17 | 78 |
}else if(!is_subset(needle, haystack)){
|
| 18 |
- return 0.0; |
|
| 79 |
+ return -1.0; |
|
| 19 | 80 |
}else if(!strcasecmp(needle, haystack)){
|
| 20 | 81 |
return 1.0; |
| 21 | 82 |
}else{
|
| 22 |
- return 0.9; |
|
| 83 |
+ return calculate_score(needle, haystack); |
|
| 23 | 84 |
} |
| 24 | 85 |
} |
| ... | ... |
@@ -1,4 +1,5 @@ |
| 1 | 1 |
#include <ctype.h> |
| 2 |
+#include <string.h> |
|
| 2 | 3 |
|
| 3 | 4 |
static int is_subset(const char *needle, const char *haystack){
|
| 4 | 5 |
while(*needle){
|
| ... | ... |
@@ -11,8 +12,13 @@ static int is_subset(const char *needle, const char *haystack){
|
| 11 | 12 |
} |
| 12 | 13 |
|
| 13 | 14 |
double match(const char *needle, const char *haystack){
|
| 14 |
- if(!is_subset(needle, haystack)){
|
|
| 15 |
+ if(!*needle){
|
|
| 16 |
+ return 1.0; |
|
| 17 |
+ }else if(!is_subset(needle, haystack)){
|
|
| 15 | 18 |
return 0.0; |
| 19 |
+ }else if(!strcasecmp(needle, haystack)){
|
|
| 20 |
+ return 1.0; |
|
| 21 |
+ }else{
|
|
| 22 |
+ return 0.9; |
|
| 16 | 23 |
} |
| 17 |
- return 1.0; |
|
| 18 | 24 |
} |
| ... | ... |
@@ -1,12 +1,18 @@ |
| 1 | 1 |
#include <ctype.h> |
| 2 | 2 |
|
| 3 |
-double match(const char *needle, const char *haystack){
|
|
| 3 |
+static int is_subset(const char *needle, const char *haystack){
|
|
| 4 | 4 |
while(*needle){
|
| 5 | 5 |
if(!*haystack) |
| 6 |
- return 0.0; |
|
| 7 |
- while(*haystack && tolower(*needle) == tolower(*haystack++)){
|
|
| 6 |
+ return 0; |
|
| 7 |
+ while(*haystack && tolower(*needle) == tolower(*haystack++)) |
|
| 8 | 8 |
needle++; |
| 9 |
- } |
|
| 9 |
+ } |
|
| 10 |
+ return 1; |
|
| 11 |
+} |
|
| 12 |
+ |
|
| 13 |
+double match(const char *needle, const char *haystack){
|
|
| 14 |
+ if(!is_subset(needle, haystack)){
|
|
| 15 |
+ return 0.0; |
|
| 10 | 16 |
} |
| 11 | 17 |
return 1.0; |
| 12 | 18 |
} |
| 1 | 1 |
new file mode 100644 |
| ... | ... |
@@ -0,0 +1,12 @@ |
| 1 |
+#include <ctype.h> |
|
| 2 |
+ |
|
| 3 |
+double match(const char *needle, const char *haystack){
|
|
| 4 |
+ while(*needle){
|
|
| 5 |
+ if(!*haystack) |
|
| 6 |
+ return 0.0; |
|
| 7 |
+ while(*haystack && tolower(*needle) == tolower(*haystack++)){
|
|
| 8 |
+ needle++; |
|
| 9 |
+ } |
|
| 10 |
+ } |
|
| 11 |
+ return 1.0; |
|
| 12 |
+} |