Browse code

Move sources into src directory

John Hawthorn authored on 21/05/2016 21:56:03
Showing 1 changed files
1 1
deleted file mode 100644
... ...
@@ -1,183 +0,0 @@
1
-#include <ctype.h>
2
-#include <string.h>
3
-#include <strings.h>
4
-#include <stdio.h>
5
-#include <float.h>
6
-#include <math.h>
7
-
8
-#include "match.h"
9
-
10
-#include "config.h"
11
-
12
-char *strcasechr(const char *s, char c) {
13
-	const char accept[3] = {c, toupper(c), 0};
14
-	return strpbrk(s, accept);
15
-}
16
-
17
-int has_match(const char *needle, const char *haystack) {
18
-	while (*needle) {
19
-		char nch = *needle++;
20
-
21
-		if (!(haystack = strcasechr(haystack, nch))) {
22
-			return 0;
23
-		}
24
-		haystack++;
25
-	}
26
-	return 1;
27
-}
28
-
29
-#define max(a, b) (((a) > (b)) ? (a) : (b))
30
-
31
-#ifdef DEBUG_VERBOSE
32
-/* print one of the internal matrices */
33
-void mat_print(score_t *mat, char name, const char *needle, const char *haystack) {
34
-	int n = strlen(needle);
35
-	int m = strlen(haystack);
36
-	int i, j;
37
-	fprintf(stderr, "%c   ", name);
38
-	for (j = 0; j < m; j++) {
39
-		fprintf(stderr, "     %c", haystack[j]);
40
-	}
41
-	fprintf(stderr, "\n");
42
-	for (i = 0; i < n; i++) {
43
-		fprintf(stderr, " %c |", needle[i]);
44
-		for (j = 0; j < m; j++) {
45
-			score_t val = mat[i * m + j];
46
-			if (val == SCORE_MIN) {
47
-				fprintf(stderr, "    -\u221E");
48
-			} else {
49
-				fprintf(stderr, " %.3f", val);
50
-			}
51
-		}
52
-		fprintf(stderr, "\n");
53
-	}
54
-	fprintf(stderr, "\n\n");
55
-}
56
-#endif
57
-
58
-score_t calculate_score(const char *needle, const char *haystack, size_t *positions) {
59
-	if (!*haystack || !*needle)
60
-		return SCORE_MIN;
61
-
62
-	int n = strlen(needle);
63
-	int m = strlen(haystack);
64
-
65
-	if (m > 1024) {
66
-		/*
67
-		 * Unreasonably large candidate: return no score
68
-		 * If it is a valid match it will still be returned, it will
69
-		 * just be ranked below any reasonably sized candidates
70
-		 */
71
-		return SCORE_MIN;
72
-	}
73
-
74
-	score_t match_bonus[m];
75
-	score_t D[n][m], M[n][m];
76
-
77
-	/*
78
-	 * D[][] Stores the best score for this position ending with a match.
79
-	 * M[][] Stores the best possible score at this position.
80
-	 */
81
-
82
-	/* Which positions are beginning of words */
83
-	char last_ch = '\0';
84
-	for (int i = 0; i < m; i++) {
85
-		char ch = haystack[i];
86
-
87
-		score_t score = 0;
88
-		if (isalnum(ch)) {
89
-			if (!last_ch || last_ch == '/') {
90
-				score = SCORE_MATCH_SLASH;
91
-			} else if (last_ch == '-' || last_ch == '_' || last_ch == ' ' ||
92
-				   (last_ch >= '0' && last_ch <= '9')) {
93
-				score = SCORE_MATCH_WORD;
94
-			} else if (last_ch >= 'a' && last_ch <= 'z' && ch >= 'A' && ch <= 'Z') {
95
-				/* CamelCase */
96
-				score = SCORE_MATCH_CAPITAL;
97
-			} else if (last_ch == '.') {
98
-				score = SCORE_MATCH_DOT;
99
-			}
100
-		}
101
-
102
-		match_bonus[i] = score;
103
-		last_ch = ch;
104
-	}
105
-
106
-	for (int i = 0; i < n; i++) {
107
-		score_t prev_score = SCORE_MIN;
108
-		score_t gap_score = i == n - 1 ? SCORE_GAP_TRAILING : SCORE_GAP_INNER;
109
-		for (int j = 0; j < m; j++) {
110
-			score_t score = SCORE_MIN;
111
-			if (tolower(needle[i]) == tolower(haystack[j])) {
112
-				if (!i) {
113
-					score = (j * SCORE_GAP_LEADING) + match_bonus[j];
114
-				} else if (j) { /* i > 0 && j > 0*/
115
-					score = max(
116
-					    M[i - 1][j - 1] + match_bonus[j],
117
-
118
-					    /* consecutive match, doesn't stack with match_bonus */
119
-					    D[i - 1][j - 1] + SCORE_MATCH_CONSECUTIVE);
120
-				}
121
-			}
122
-			D[i][j] = score;
123
-			M[i][j] = prev_score = max(score, prev_score + gap_score);
124
-		}
125
-	}
126
-
127
-#ifdef DEBUG_VERBOSE
128
-	fprintf(stderr, "\"%s\" =~ \"%s\"\n", needle, haystack);
129
-	mat_print(&D[0][0], 'D', needle, haystack);
130
-	mat_print(&M[0][0], 'M', needle, haystack);
131
-	fprintf(stderr, "\n");
132
-#endif
133
-
134
-	/* backtrace to find the positions of optimal matching */
135
-	if (positions) {
136
-		int match_required = 0;
137
-		for (int i = n - 1, j = m - 1; i >= 0; i--) {
138
-			for (; j >= 0; j--) {
139
-				/*
140
-				 * There may be multiple paths which result in
141
-				 * the optimal weight.
142
-				 *
143
-				 * For simplicity, we will pick the first one
144
-				 * we encounter, the latest in the candidate
145
-				 * string.
146
-				 */
147
-				if (D[i][j] != SCORE_MIN &&
148
-				    (match_required || D[i][j] == M[i][j])) {
149
-					/* If this score was determined using
150
-					 * SCORE_MATCH_CONSECUTIVE, the
151
-					 * previous character MUST be a match
152
-					 */
153
-					match_required =
154
-					    i && j &&
155
-					    M[i][j] == D[i - 1][j - 1] + SCORE_MATCH_CONSECUTIVE;
156
-					positions[i] = j--;
157
-					break;
158
-				}
159
-			}
160
-		}
161
-	}
162
-
163
-	return M[n - 1][m - 1];
164
-}
165
-
166
-score_t match_positions(const char *needle, const char *haystack, size_t *positions) {
167
-	if (!*needle) {
168
-		return SCORE_MAX;
169
-	} else if (!strcasecmp(needle, haystack)) {
170
-		if (positions) {
171
-			int n = strlen(needle);
172
-			for (int i = 0; i < n; i++)
173
-				positions[i] = i;
174
-		}
175
-		return SCORE_MAX;
176
-	} else {
177
-		return calculate_score(needle, haystack, positions);
178
-	}
179
-}
180
-
181
-score_t match(const char *needle, const char *haystack) {
182
-	return match_positions(needle, haystack, NULL);
183
-}
Browse code

Improve debugging print

John Hawthorn authored on 24/04/2016 23:35:29
Showing 1 changed files
... ...
@@ -30,13 +30,13 @@ int has_match(const char *needle, const char *haystack) {
30 30
 
31 31
 #ifdef DEBUG_VERBOSE
32 32
 /* print one of the internal matrices */
33
-void mat_print(score_t *mat, const char *needle, const char *haystack) {
33
+void mat_print(score_t *mat, char name, const char *needle, const char *haystack) {
34 34
 	int n = strlen(needle);
35 35
 	int m = strlen(haystack);
36 36
 	int i, j;
37
-	fprintf(stderr, "    ");
37
+	fprintf(stderr, "%c   ", name);
38 38
 	for (j = 0; j < m; j++) {
39
-		fprintf(stderr, "    %c", haystack[j]);
39
+		fprintf(stderr, "     %c", haystack[j]);
40 40
 	}
41 41
 	fprintf(stderr, "\n");
42 42
 	for (i = 0; i < n; i++) {
... ...
@@ -44,9 +44,9 @@ void mat_print(score_t *mat, const char *needle, const char *haystack) {
44 44
 		for (j = 0; j < m; j++) {
45 45
 			score_t val = mat[i * m + j];
46 46
 			if (val == SCORE_MIN) {
47
-				fprintf(stderr, "   -\u221E");
47
+				fprintf(stderr, "    -\u221E");
48 48
 			} else {
49
-				fprintf(stderr, " % 4g", val);
49
+				fprintf(stderr, " %.3f", val);
50 50
 			}
51 51
 		}
52 52
 		fprintf(stderr, "\n");
... ...
@@ -111,7 +111,7 @@ score_t calculate_score(const char *needle, const char *haystack, size_t *positi
111 111
 			if (tolower(needle[i]) == tolower(haystack[j])) {
112 112
 				if (!i) {
113 113
 					score = (j * SCORE_GAP_LEADING) + match_bonus[j];
114
-				} else if (j) {
114
+				} else if (j) { /* i > 0 && j > 0*/
115 115
 					score = max(
116 116
 					    M[i - 1][j - 1] + match_bonus[j],
117 117
 
... ...
@@ -126,8 +126,8 @@ score_t calculate_score(const char *needle, const char *haystack, size_t *positi
126 126
 
127 127
 #ifdef DEBUG_VERBOSE
128 128
 	fprintf(stderr, "\"%s\" =~ \"%s\"\n", needle, haystack);
129
-	mat_print(&D[0][0], needle, haystack);
130
-	mat_print(&M[0][0], needle, haystack);
129
+	mat_print(&D[0][0], 'D', needle, haystack);
130
+	mat_print(&M[0][0], 'M', needle, haystack);
131 131
 	fprintf(stderr, "\n");
132 132
 #endif
133 133
 
Browse code

Add ALGORITHM.md

John Hawthorn authored on 19/12/2015 23:01:25 • John Hawthorn committed on 20/04/2016 00:20:50
Showing 1 changed files
... ...
@@ -36,7 +36,7 @@ void mat_print(score_t *mat, const char *needle, const char *haystack) {
36 36
 	int i, j;
37 37
 	fprintf(stderr, "    ");
38 38
 	for (j = 0; j < m; j++) {
39
-		fprintf(stderr, "     %c", haystack[j]);
39
+		fprintf(stderr, "    %c", haystack[j]);
40 40
 	}
41 41
 	fprintf(stderr, "\n");
42 42
 	for (i = 0; i < n; i++) {
... ...
@@ -44,9 +44,9 @@ void mat_print(score_t *mat, const char *needle, const char *haystack) {
44 44
 		for (j = 0; j < m; j++) {
45 45
 			score_t val = mat[i * m + j];
46 46
 			if (val == SCORE_MIN) {
47
-				fprintf(stderr, "  -inf");
47
+				fprintf(stderr, "   -\u221E");
48 48
 			} else {
49
-				fprintf(stderr, " % .2f", val);
49
+				fprintf(stderr, " % 4g", val);
50 50
 			}
51 51
 		}
52 52
 		fprintf(stderr, "\n");
Browse code

Add DEBUG_VERBOSE flag

John Hawthorn authored on 08/12/2015 02:20:00
Showing 1 changed files
... ...
@@ -28,6 +28,7 @@ int has_match(const char *needle, const char *haystack) {
28 28
 
29 29
 #define max(a, b) (((a) > (b)) ? (a) : (b))
30 30
 
31
+#ifdef DEBUG_VERBOSE
31 32
 /* print one of the internal matrices */
32 33
 void mat_print(score_t *mat, const char *needle, const char *haystack) {
33 34
 	int n = strlen(needle);
... ...
@@ -52,6 +53,7 @@ void mat_print(score_t *mat, const char *needle, const char *haystack) {
52 53
 	}
53 54
 	fprintf(stderr, "\n\n");
54 55
 }
56
+#endif
55 57
 
56 58
 score_t calculate_score(const char *needle, const char *haystack, size_t *positions) {
57 59
 	if (!*haystack || !*needle)
... ...
@@ -122,7 +124,7 @@ score_t calculate_score(const char *needle, const char *haystack, size_t *positi
122 124
 		}
123 125
 	}
124 126
 
125
-#if 0
127
+#ifdef DEBUG_VERBOSE
126 128
 	fprintf(stderr, "\"%s\" =~ \"%s\"\n", needle, haystack);
127 129
 	mat_print(&D[0][0], needle, haystack);
128 130
 	mat_print(&M[0][0], needle, haystack);
Browse code

Apply clang-format to all files

Apologies that this uses my preferred formatting style: mostly the same
as Linux, but without a break between function and brace. Adds spaces in
a few places they weren't before.

John Hawthorn authored on 07/11/2015 05:45:02
Showing 1 changed files
... ...
@@ -9,16 +9,16 @@
9 9
 
10 10
 #include "config.h"
11 11
 
12
-char *strcasechr(const char *s, char c){
12
+char *strcasechr(const char *s, char c) {
13 13
 	const char accept[3] = {c, toupper(c), 0};
14 14
 	return strpbrk(s, accept);
15 15
 }
16 16
 
17
-int has_match(const char *needle, const char *haystack){
18
-	while(*needle){
17
+int has_match(const char *needle, const char *haystack) {
18
+	while (*needle) {
19 19
 		char nch = *needle++;
20 20
 
21
-		if(!(haystack = strcasechr(haystack, nch))){
21
+		if (!(haystack = strcasechr(haystack, nch))) {
22 22
 			return 0;
23 23
 		}
24 24
 		haystack++;
... ...
@@ -29,22 +29,22 @@ int has_match(const char *needle, const char *haystack){
29 29
 #define max(a, b) (((a) > (b)) ? (a) : (b))
30 30
 
31 31
 /* print one of the internal matrices */
32
-void mat_print(score_t *mat, const char *needle, const char *haystack){
32
+void mat_print(score_t *mat, const char *needle, const char *haystack) {
33 33
 	int n = strlen(needle);
34 34
 	int m = strlen(haystack);
35 35
 	int i, j;
36 36
 	fprintf(stderr, "    ");
37
-	for(j = 0; j < m; j++){
37
+	for (j = 0; j < m; j++) {
38 38
 		fprintf(stderr, "     %c", haystack[j]);
39 39
 	}
40 40
 	fprintf(stderr, "\n");
41
-	for(i = 0; i < n; i++){
41
+	for (i = 0; i < n; i++) {
42 42
 		fprintf(stderr, " %c |", needle[i]);
43
-		for(j = 0; j < m; j++){
44
-			score_t val = mat[i*m + j];
45
-			if(val == SCORE_MIN){
43
+		for (j = 0; j < m; j++) {
44
+			score_t val = mat[i * m + j];
45
+			if (val == SCORE_MIN) {
46 46
 				fprintf(stderr, "  -inf");
47
-			}else{
47
+			} else {
48 48
 				fprintf(stderr, " % .2f", val);
49 49
 			}
50 50
 		}
... ...
@@ -53,14 +53,14 @@ void mat_print(score_t *mat, const char *needle, const char *haystack){
53 53
 	fprintf(stderr, "\n\n");
54 54
 }
55 55
 
56
-score_t calculate_score(const char *needle, const char *haystack, size_t *positions){
57
-	if(!*haystack || !*needle)
56
+score_t calculate_score(const char *needle, const char *haystack, size_t *positions) {
57
+	if (!*haystack || !*needle)
58 58
 		return SCORE_MIN;
59 59
 
60 60
 	int n = strlen(needle);
61 61
 	int m = strlen(haystack);
62 62
 
63
-	if(m > 1024){
63
+	if (m > 1024) {
64 64
 		/*
65 65
 		 * Unreasonably large candidate: return no score
66 66
 		 * If it is a valid match it will still be returned, it will
... ...
@@ -79,23 +79,20 @@ score_t calculate_score(const char *needle, const char *haystack, size_t *positi
79 79
 
80 80
 	/* Which positions are beginning of words */
81 81
 	char last_ch = '\0';
82
-	for(int i = 0; i < m; i++){
82
+	for (int i = 0; i < m; i++) {
83 83
 		char ch = haystack[i];
84 84
 
85 85
 		score_t score = 0;
86
-		if(isalnum(ch)){
87
-			if(!last_ch || last_ch == '/'){
86
+		if (isalnum(ch)) {
87
+			if (!last_ch || last_ch == '/') {
88 88
 				score = SCORE_MATCH_SLASH;
89
-			}else if(last_ch == '-' ||
90
-					last_ch == '_' ||
91
-					last_ch == ' ' ||
92
-					(last_ch >= '0' && last_ch <= '9')){
89
+			} else if (last_ch == '-' || last_ch == '_' || last_ch == ' ' ||
90
+				   (last_ch >= '0' && last_ch <= '9')) {
93 91
 				score = SCORE_MATCH_WORD;
94
-			}else if(last_ch >= 'a' && last_ch <= 'z' &&
95
-					ch >= 'A' && ch <= 'Z'){
92
+			} else if (last_ch >= 'a' && last_ch <= 'z' && ch >= 'A' && ch <= 'Z') {
96 93
 				/* CamelCase */
97 94
 				score = SCORE_MATCH_CAPITAL;
98
-			}else if(last_ch == '.'){
95
+			} else if (last_ch == '.') {
99 96
 				score = SCORE_MATCH_DOT;
100 97
 			}
101 98
 		}
... ...
@@ -104,21 +101,20 @@ score_t calculate_score(const char *needle, const char *haystack, size_t *positi
104 101
 		last_ch = ch;
105 102
 	}
106 103
 
107
-	for(int i = 0; i < n; i++){
104
+	for (int i = 0; i < n; i++) {
108 105
 		score_t prev_score = SCORE_MIN;
109
-		score_t gap_score = i == n-1 ? SCORE_GAP_TRAILING : SCORE_GAP_INNER;
110
-		for(int j = 0; j < m; j++){
106
+		score_t gap_score = i == n - 1 ? SCORE_GAP_TRAILING : SCORE_GAP_INNER;
107
+		for (int j = 0; j < m; j++) {
111 108
 			score_t score = SCORE_MIN;
112
-			if(tolower(needle[i]) == tolower(haystack[j])){
113
-				if(!i){
109
+			if (tolower(needle[i]) == tolower(haystack[j])) {
110
+				if (!i) {
114 111
 					score = (j * SCORE_GAP_LEADING) + match_bonus[j];
115
-				}else if(j){
112
+				} else if (j) {
116 113
 					score = max(
117
-						M[i-1][j-1] + match_bonus[j],
114
+					    M[i - 1][j - 1] + match_bonus[j],
118 115
 
119
-						/* consecutive match, doesn't stack with match_bonus */
120
-						D[i-1][j-1] + SCORE_MATCH_CONSECUTIVE
121
-						);
116
+					    /* consecutive match, doesn't stack with match_bonus */
117
+					    D[i - 1][j - 1] + SCORE_MATCH_CONSECUTIVE);
122 118
 				}
123 119
 			}
124 120
 			D[i][j] = score;
... ...
@@ -134,10 +130,10 @@ score_t calculate_score(const char *needle, const char *haystack, size_t *positi
134 130
 #endif
135 131
 
136 132
 	/* backtrace to find the positions of optimal matching */
137
-	if(positions){
133
+	if (positions) {
138 134
 		int match_required = 0;
139
-		for(int i = n-1, j = m-1; i >= 0; i--){
140
-			for(; j >= 0; j--){
135
+		for (int i = n - 1, j = m - 1; i >= 0; i--) {
136
+			for (; j >= 0; j--) {
141 137
 				/*
142 138
 				 * There may be multiple paths which result in
143 139
 				 * the optimal weight.
... ...
@@ -146,12 +142,15 @@ score_t calculate_score(const char *needle, const char *haystack, size_t *positi
146 142
 				 * we encounter, the latest in the candidate
147 143
 				 * string.
148 144
 				 */
149
-				if(D[i][j] != SCORE_MIN && (match_required || D[i][j] == M[i][j])){
145
+				if (D[i][j] != SCORE_MIN &&
146
+				    (match_required || D[i][j] == M[i][j])) {
150 147
 					/* If this score was determined using
151 148
 					 * SCORE_MATCH_CONSECUTIVE, the
152 149
 					 * previous character MUST be a match
153 150
 					 */
154
-					match_required = i && j && M[i][j] == D[i-1][j-1] + SCORE_MATCH_CONSECUTIVE;
151
+					match_required =
152
+					    i && j &&
153
+					    M[i][j] == D[i - 1][j - 1] + SCORE_MATCH_CONSECUTIVE;
155 154
 					positions[i] = j--;
156 155
 					break;
157 156
 				}
... ...
@@ -159,24 +158,24 @@ score_t calculate_score(const char *needle, const char *haystack, size_t *positi
159 158
 		}
160 159
 	}
161 160
 
162
-	return M[n-1][m-1];
161
+	return M[n - 1][m - 1];
163 162
 }
164 163
 
165
-score_t match_positions(const char *needle, const char *haystack, size_t *positions){
166
-	if(!*needle){
164
+score_t match_positions(const char *needle, const char *haystack, size_t *positions) {
165
+	if (!*needle) {
167 166
 		return SCORE_MAX;
168
-	}else if(!strcasecmp(needle, haystack)){
169
-		if(positions){
167
+	} else if (!strcasecmp(needle, haystack)) {
168
+		if (positions) {
170 169
 			int n = strlen(needle);
171
-			for(int i = 0; i < n; i++)
170
+			for (int i = 0; i < n; i++)
172 171
 				positions[i] = i;
173 172
 		}
174 173
 		return SCORE_MAX;
175
-	}else{
174
+	} else {
176 175
 		return calculate_score(needle, haystack, positions);
177 176
 	}
178 177
 }
179 178
 
180
-score_t match(const char *needle, const char *haystack){
179
+score_t match(const char *needle, const char *haystack) {
181 180
 	return match_positions(needle, haystack, NULL);
182 181
 }
Browse code

Rewrite has_match using strpbrk

John Hawthorn authored on 22/09/2014 06:13:09
Showing 1 changed files
... ...
@@ -9,16 +9,19 @@
9 9
 
10 10
 #include "config.h"
11 11
 
12
+char *strcasechr(const char *s, char c){
13
+	const char accept[3] = {c, toupper(c), 0};
14
+	return strpbrk(s, accept);
15
+}
16
+
12 17
 int has_match(const char *needle, const char *haystack){
13 18
 	while(*needle){
14
-		char nch = tolower(*needle++);
15
-		for(;;){
16
-			char ch = *haystack++;
17
-			if(!ch)
18
-				return 0;
19
-			else if(nch == tolower(ch))
20
-				break;
19
+		char nch = *needle++;
20
+
21
+		if(!(haystack = strcasechr(haystack, nch))){
22
+			return 0;
21 23
 		}
24
+		haystack++;
22 25
 	}
23 26
 	return 1;
24 27
 }
Browse code

Prefer score_t over double

John Hawthorn authored on 22/09/2014 07:30:29
Showing 1 changed files
... ...
@@ -50,7 +50,7 @@ void mat_print(score_t *mat, const char *needle, const char *haystack){
50 50
 	fprintf(stderr, "\n\n");
51 51
 }
52 52
 
53
-double calculate_score(const char *needle, const char *haystack, size_t *positions){
53
+score_t calculate_score(const char *needle, const char *haystack, size_t *positions){
54 54
 	if(!*haystack || !*needle)
55 55
 		return SCORE_MIN;
56 56
 
... ...
@@ -102,8 +102,8 @@ double calculate_score(const char *needle, const char *haystack, size_t *positio
102 102
 	}
103 103
 
104 104
 	for(int i = 0; i < n; i++){
105
-		double prev_score = SCORE_MIN;
106
-		double gap_score = i == n-1 ? SCORE_GAP_TRAILING : SCORE_GAP_INNER;
105
+		score_t prev_score = SCORE_MIN;
106
+		score_t gap_score = i == n-1 ? SCORE_GAP_TRAILING : SCORE_GAP_INNER;
107 107
 		for(int j = 0; j < m; j++){
108 108
 			score_t score = SCORE_MIN;
109 109
 			if(tolower(needle[i]) == tolower(haystack[j])){
... ...
@@ -159,7 +159,7 @@ double calculate_score(const char *needle, const char *haystack, size_t *positio
159 159
 	return M[n-1][m-1];
160 160
 }
161 161
 
162
-double match_positions(const char *needle, const char *haystack, size_t *positions){
162
+score_t match_positions(const char *needle, const char *haystack, size_t *positions){
163 163
 	if(!*needle){
164 164
 		return SCORE_MAX;
165 165
 	}else if(!strcasecmp(needle, haystack)){
... ...
@@ -174,6 +174,6 @@ double match_positions(const char *needle, const char *haystack, size_t *positio
174 174
 	}
175 175
 }
176 176
 
177
-double match(const char *needle, const char *haystack){
177
+score_t match(const char *needle, const char *haystack){
178 178
 	return match_positions(needle, haystack, NULL);
179 179
 }
Browse code

Move some score definitions into header

John Hawthorn authored on 21/09/2014 21:12:47
Showing 1 changed files
... ...
@@ -24,9 +24,6 @@ int has_match(const char *needle, const char *haystack){
24 24
 }
25 25
 
26 26
 #define max(a, b) (((a) > (b)) ? (a) : (b))
27
-typedef double score_t;
28
-#define SCORE_MAX INFINITY
29
-#define SCORE_MIN -INFINITY
30 27
 
31 28
 /* print one of the internal matrices */
32 29
 void mat_print(score_t *mat, const char *needle, const char *haystack){
Browse code

Move scoring constants into config.h

John Hawthorn authored on 19/09/2014 00:28:21
Showing 1 changed files
... ...
@@ -7,6 +7,8 @@
7 7
 
8 8
 #include "match.h"
9 9
 
10
+#include "config.h"
11
+
10 12
 int has_match(const char *needle, const char *haystack){
11 13
 	while(*needle){
12 14
 		char nch = tolower(*needle++);
... ...
@@ -75,15 +77,6 @@ double calculate_score(const char *needle, const char *haystack, size_t *positio
75 77
 	 * M[][] Stores the best possible score at this position.
76 78
 	 */
77 79
 
78
-#define SCORE_GAP_LEADING      -0.005
79
-#define SCORE_GAP_TRAILING     -0.005
80
-#define SCORE_GAP_INNER        -0.01
81
-#define SCORE_MATCH_CONSECUTIVE 1.0
82
-#define SCORE_MATCH_SLASH       0.9
83
-#define SCORE_MATCH_WORD        0.8
84
-#define SCORE_MATCH_CAPITAL     0.7
85
-#define SCORE_MATCH_DOT         0.6
86
-
87 80
 	/* Which positions are beginning of words */
88 81
 	char last_ch = '\0';
89 82
 	for(int i = 0; i < m; i++){
Browse code

Cleanup headers

John Hawthorn authored on 15/09/2014 02:42:43
Showing 1 changed files
... ...
@@ -5,7 +5,7 @@
5 5
 #include <float.h>
6 6
 #include <math.h>
7 7
 
8
-#include "fzy.h"
8
+#include "match.h"
9 9
 
10 10
 int has_match(const char *needle, const char *haystack){
11 11
 	while(*needle){
Browse code

Remove unecessary has_match test

John Hawthorn authored on 14/09/2014 06:43:40
Showing 1 changed files
... ...
@@ -172,8 +172,6 @@ double calculate_score(const char *needle, const char *haystack, size_t *positio
172 172
 double match_positions(const char *needle, const char *haystack, size_t *positions){
173 173
 	if(!*needle){
174 174
 		return SCORE_MAX;
175
-	}else if(!has_match(needle, haystack)){
176
-		return SCORE_MIN;
177 175
 	}else if(!strcasecmp(needle, haystack)){
178 176
 		if(positions){
179 177
 			int n = strlen(needle);
Browse code

Improve performance of has_match

John Hawthorn authored on 14/09/2014 05:57:26
Showing 1 changed files
... ...
@@ -9,10 +9,14 @@
9 9
 
10 10
 int has_match(const char *needle, const char *haystack){
11 11
 	while(*needle){
12
-		if(!*haystack)
13
-			return 0;
14
-		while(*haystack && tolower(*needle) == tolower(*haystack++))
15
-			needle++;
12
+		char nch = tolower(*needle++);
13
+		for(;;){
14
+			char ch = *haystack++;
15
+			if(!ch)
16
+				return 0;
17
+			else if(nch == tolower(ch))
18
+				break;
19
+		}
16 20
 	}
17 21
 	return 1;
18 22
 }
Browse code

bzero of matrices is unnecessary

John Hawthorn authored on 07/09/2014 01:55:02
Showing 1 changed files
... ...
@@ -65,8 +65,6 @@ double calculate_score(const char *needle, const char *haystack, size_t *positio
65 65
 
66 66
 	score_t match_bonus[m];
67 67
 	score_t D[n][m], M[n][m];
68
-	bzero(D, sizeof(D));
69
-	bzero(M, sizeof(M));
70 68
 
71 69
 	/*
72 70
 	 * D[][] Stores the best score for this position ending with a match.
Browse code

Adjust and reformat match inner loop

John Hawthorn authored on 07/09/2014 01:33:55
Showing 1 changed files
... ...
@@ -118,16 +118,16 @@ double calculate_score(const char *needle, const char *haystack, size_t *positio
118 118
 				if(!i){
119 119
 					score = (j * SCORE_GAP_LEADING) + match_bonus[j];
120 120
 				}else if(j){
121
-					score = max(score, M[i-1][j-1] + match_bonus[j]);
121
+					score = max(
122
+						M[i-1][j-1] + match_bonus[j],
122 123
 
123
-					/* consecutive match, doesn't stack with match_bonus */
124
-					score = max(score, D[i-1][j-1] + SCORE_MATCH_CONSECUTIVE);
124
+						/* consecutive match, doesn't stack with match_bonus */
125
+						D[i-1][j-1] + SCORE_MATCH_CONSECUTIVE
126
+						);
125 127
 				}
126 128
 			}
127 129
 			D[i][j] = score;
128
-			score = max(score, prev_score + gap_score);
129
-			M[i][j] = score;
130
-			prev_score = score;
130
+			M[i][j] = prev_score = max(score, prev_score + gap_score);
131 131
 		}
132 132
 	}
133 133
 
Browse code

Optimize inner loop

John Hawthorn authored on 07/09/2014 01:07:01
Showing 1 changed files
... ...
@@ -111,6 +111,7 @@ double calculate_score(const char *needle, const char *haystack, size_t *positio
111 111
 
112 112
 	for(int i = 0; i < n; i++){
113 113
 		double prev_score = SCORE_MIN;
114
+		double gap_score = i == n-1 ? SCORE_GAP_TRAILING : SCORE_GAP_INNER;
114 115
 		for(int j = 0; j < m; j++){
115 116
 			score_t score = SCORE_MIN;
116 117
 			if(tolower(needle[i]) == tolower(haystack[j])){
... ...
@@ -124,13 +125,7 @@ double calculate_score(const char *needle, const char *haystack, size_t *positio
124 125
 				}
125 126
 			}
126 127
 			D[i][j] = score;
127
-			if(j){
128
-				if(i == n-1){
129
-					score = max(score, prev_score + SCORE_GAP_TRAILING);
130
-				}else{
131
-					score = max(score, prev_score + SCORE_GAP_INNER);
132
-				}
133
-			}
128
+			score = max(score, prev_score + gap_score);
134 129
 			M[i][j] = score;
135 130
 			prev_score = score;
136 131
 		}
Browse code

Avoid unnecessary matrix access

John Hawthorn authored on 07/09/2014 01:04:10
Showing 1 changed files
... ...
@@ -110,6 +110,7 @@ double calculate_score(const char *needle, const char *haystack, size_t *positio
110 110
 	}
111 111
 
112 112
 	for(int i = 0; i < n; i++){
113
+		double prev_score = SCORE_MIN;
113 114
 		for(int j = 0; j < m; j++){
114 115
 			score_t score = SCORE_MIN;
115 116
 			if(tolower(needle[i]) == tolower(haystack[j])){
... ...
@@ -125,12 +126,13 @@ double calculate_score(const char *needle, const char *haystack, size_t *positio
125 126
 			D[i][j] = score;
126 127
 			if(j){
127 128
 				if(i == n-1){
128
-					score = max(score, M[i][j-1] + SCORE_GAP_TRAILING);
129
+					score = max(score, prev_score + SCORE_GAP_TRAILING);
129 130
 				}else{
130
-					score = max(score, M[i][j-1] + SCORE_GAP_INNER);
131
+					score = max(score, prev_score + SCORE_GAP_INNER);
131 132
 				}
132 133
 			}
133 134
 			M[i][j] = score;
135
+			prev_score = score;
134 136
 		}
135 137
 	}
136 138
 
Browse code

Rearrance calculate_score inner loop for clarity

John Hawthorn authored on 07/09/2014 00:29:06
Showing 1 changed files
... ...
@@ -112,19 +112,17 @@ double calculate_score(const char *needle, const char *haystack, size_t *positio
112 112
 	for(int i = 0; i < n; i++){
113 113
 		for(int j = 0; j < m; j++){
114 114
 			score_t score = SCORE_MIN;
115
-			int match = tolower(needle[i]) == tolower(haystack[j]);
116
-			D[i][j] = SCORE_MIN;
117
-			if(match){
118
-				if(i && j){
115
+			if(tolower(needle[i]) == tolower(haystack[j])){
116
+				if(!i){
117
+					score = (j * SCORE_GAP_LEADING) + match_bonus[j];
118
+				}else if(j){
119 119
 					score = max(score, M[i-1][j-1] + match_bonus[j]);
120 120
 
121 121
 					/* consecutive match, doesn't stack with match_bonus */
122 122
 					score = max(score, D[i-1][j-1] + SCORE_MATCH_CONSECUTIVE);
123
-				}else if(!i){
124
-					score = (j * SCORE_GAP_LEADING) + match_bonus[j];
125 123
 				}
126
-				D[i][j] = score;
127 124
 			}
125
+			D[i][j] = score;
128 126
 			if(j){
129 127
 				if(i == n-1){
130 128
 					score = max(score, M[i][j-1] + SCORE_GAP_TRAILING);
Browse code

scoring: Prefer consecutive matches

John Hawthorn authored on 31/08/2014 02:26:22
Showing 1 changed files
... ...
@@ -77,10 +77,10 @@ double calculate_score(const char *needle, const char *haystack, size_t *positio
77 77
 #define SCORE_GAP_TRAILING     -0.005
78 78
 #define SCORE_GAP_INNER        -0.01
79 79
 #define SCORE_MATCH_CONSECUTIVE 1.0
80
-#define SCORE_MATCH_SLASH       1.5
81
-#define SCORE_MATCH_WORD        1.2
82
-#define SCORE_MATCH_CAPITAL     1.1
83
-#define SCORE_MATCH_DOT         0.8
80
+#define SCORE_MATCH_SLASH       0.9
81
+#define SCORE_MATCH_WORD        0.8
82
+#define SCORE_MATCH_CAPITAL     0.7
83
+#define SCORE_MATCH_DOT         0.6
84 84
 
85 85
 	/* Which positions are beginning of words */
86 86
 	char last_ch = '\0';
Browse code

Fix backtrace regarding SCORE_MATCH_CONSECUTIVE

John Hawthorn authored on 31/08/2014 02:35:33
Showing 1 changed files
... ...
@@ -145,6 +145,7 @@ double calculate_score(const char *needle, const char *haystack, size_t *positio
145 145
 
146 146
 	/* backtrace to find the positions of optimal matching */
147 147
 	if(positions){
148
+		int match_required = 0;
148 149
 		for(int i = n-1, j = m-1; i >= 0; i--){
149 150
 			for(; j >= 0; j--){
150 151
 				/*
... ...
@@ -155,7 +156,12 @@ double calculate_score(const char *needle, const char *haystack, size_t *positio
155 156
 				 * we encounter, the latest in the candidate
156 157
 				 * string.
157 158
 				 */
158
-				if(D[i][j] != SCORE_MIN && D[i][j] == M[i][j]){
159
+				if(D[i][j] != SCORE_MIN && (match_required || D[i][j] == M[i][j])){
160
+					/* If this score was determined using
161
+					 * SCORE_MATCH_CONSECUTIVE, the
162
+					 * previous character MUST be a match
163
+					 */
164
+					match_required = i && j && M[i][j] == D[i-1][j-1] + SCORE_MATCH_CONSECUTIVE;
159 165
 					positions[i] = j--;
160 166
 					break;
161 167
 				}
Browse code

We can use D[][] to test if there was a match

John Hawthorn authored on 31/08/2014 02:31:16
Showing 1 changed files
... ...
@@ -155,7 +155,7 @@ double calculate_score(const char *needle, const char *haystack, size_t *positio
155 155
 				 * we encounter, the latest in the candidate
156 156
 				 * string.
157 157
 				 */
158
-				if(tolower(needle[i]) == tolower(haystack[j]) && D[i][j] == M[i][j]){
158
+				if(D[i][j] != SCORE_MIN && D[i][j] == M[i][j]){
159 159
 					positions[i] = j--;
160 160
 					break;
161 161
 				}
Browse code

D[0][0] need not be initialized to 0

John Hawthorn authored on 31/08/2014 02:25:44
Showing 1 changed files
... ...
@@ -111,7 +111,7 @@ double calculate_score(const char *needle, const char *haystack, size_t *positio
111 111
 
112 112
 	for(int i = 0; i < n; i++){
113 113
 		for(int j = 0; j < m; j++){
114
-			score_t score = (j || i) ? SCORE_MIN : 0;
114
+			score_t score = SCORE_MIN;
115 115
 			int match = tolower(needle[i]) == tolower(haystack[j]);
116 116
 			D[i][j] = SCORE_MIN;
117 117
 			if(match){
Browse code

Improve debugging output

John Hawthorn authored on 31/08/2014 02:11:55
Showing 1 changed files
... ...
@@ -23,9 +23,17 @@ typedef double score_t;
23 23
 #define SCORE_MIN -INFINITY
24 24
 
25 25
 /* print one of the internal matrices */
26
-void mat_print(score_t *mat, int n, int m){
26
+void mat_print(score_t *mat, const char *needle, const char *haystack){
27
+	int n = strlen(needle);
28
+	int m = strlen(haystack);
27 29
 	int i, j;
30
+	fprintf(stderr, "    ");
31
+	for(j = 0; j < m; j++){
32
+		fprintf(stderr, "     %c", haystack[j]);
33
+	}
34
+	fprintf(stderr, "\n");
28 35
 	for(i = 0; i < n; i++){
36
+		fprintf(stderr, " %c |", needle[i]);
29 37
 		for(j = 0; j < m; j++){
30 38
 			score_t val = mat[i*m + j];
31 39
 			if(val == SCORE_MIN){
... ...
@@ -129,10 +137,10 @@ double calculate_score(const char *needle, const char *haystack, size_t *positio
129 137
 	}
130 138
 
131 139
 #if 0
132
-	printf("\"%s\" =~ \"%s\"\n", needle, haystack);
133
-	mat_print(&D[0][0], n, m);
134
-	mat_print(&M[0][0], n, m);
135
-	printf("\n");
140
+	fprintf(stderr, "\"%s\" =~ \"%s\"\n", needle, haystack);
141
+	mat_print(&D[0][0], needle, haystack);
142
+	mat_print(&M[0][0], needle, haystack);
143
+	fprintf(stderr, "\n");
136 144
 #endif
137 145
 
138 146
 	/* backtrace to find the positions of optimal matching */
Browse code

Really long candidates should return SCORE_MIN

John Hawthorn authored on 17/08/2014 04:07:50
Showing 1 changed files
... ...
@@ -52,7 +52,7 @@ double calculate_score(const char *needle, const char *haystack, size_t *positio
52 52
 		 * If it is a valid match it will still be returned, it will
53 53
 		 * just be ranked below any reasonably sized candidates
54 54
 		 */
55
-		return 0;
55
+		return SCORE_MIN;
56 56
 	}
57 57
 
58 58
 	score_t match_bonus[m];
Browse code

No need to scale score in 0..1

Change-Id: Iea655e766abdaec8e7f3e2c3aa5d23274636cfb9

John Hawthorn authored on 02/08/2014 02:34:41
Showing 1 changed files
... ...
@@ -155,7 +155,7 @@ double calculate_score(const char *needle, const char *haystack, size_t *positio
155 155
 		}
156 156
 	}
157 157
 
158
-	return (float)(M[n-1][m-1]) / (float)(n * 2 + 1);
158
+	return M[n-1][m-1];
159 159
 }
160 160
 
161 161
 double match_positions(const char *needle, const char *haystack, size_t *positions){
Browse code

Only M[0][0] should default to 0

John Hawthorn authored on 04/08/2014 06:55:33
Showing 1 changed files
... ...
@@ -103,7 +103,7 @@ double calculate_score(const char *needle, const char *haystack, size_t *positio
103 103
 
104 104
 	for(int i = 0; i < n; i++){
105 105
 		for(int j = 0; j < m; j++){
106
-			score_t score = j ? SCORE_MIN : 0;
106
+			score_t score = (j || i) ? SCORE_MIN : 0;
107 107
 			int match = tolower(needle[i]) == tolower(haystack[j]);
108 108
 			D[i][j] = SCORE_MIN;
109 109
 			if(match){
Browse code

Use SCORE_MATCH_SLASH for first character

John Hawthorn authored on 04/08/2014 02:17:32
Showing 1 changed files
... ...
@@ -81,7 +81,7 @@ double calculate_score(const char *needle, const char *haystack, size_t *positio
81 81
 
82 82
 		score_t score = 0;
83 83
 		if(isalnum(ch)){
84
-			if(last_ch == '/'){
84
+			if(!last_ch || last_ch == '/'){
85 85
 				score = SCORE_MATCH_SLASH;
86 86
 			}else if(last_ch == '-' ||
87 87
 					last_ch == '_' ||
Browse code

Lesser penalty for leading and trailing gaps

John Hawthorn authored on 30/07/2014 11:18:47
Showing 1 changed files
... ...
@@ -29,9 +29,9 @@ void mat_print(score_t *mat, int n, int m){
29 29
 		for(j = 0; j < m; j++){
30 30
 			score_t val = mat[i*m + j];
31 31
 			if(val == SCORE_MIN){
32
-				fprintf(stderr, " -inf");
32
+				fprintf(stderr, "  -inf");
33 33
 			}else{
34
-				fprintf(stderr, " %.2f", val);
34
+				fprintf(stderr, " % .2f", val);
35 35
 			}
36 36
 		}
37 37
 		fprintf(stderr, "\n");
... ...
@@ -65,8 +65,8 @@ double calculate_score(const char *needle, const char *haystack, size_t *positio
65 65
 	 * M[][] Stores the best possible score at this position.
66 66
 	 */
67 67
 
68
-#define SCORE_GAP_LEADING      -0.01
69
-#define SCORE_GAP_TRAILING     -0.01
68
+#define SCORE_GAP_LEADING      -0.005
69
+#define SCORE_GAP_TRAILING     -0.005
70 70
 #define SCORE_GAP_INNER        -0.01
71 71
 #define SCORE_MATCH_CONSECUTIVE 1.0
72 72
 #define SCORE_MATCH_SLASH       1.5
... ...
@@ -129,8 +129,10 @@ double calculate_score(const char *needle, const char *haystack, size_t *positio
129 129
 	}
130 130
 
131 131
 #if 0
132
+	printf("\"%s\" =~ \"%s\"\n", needle, haystack);
132 133
 	mat_print(&D[0][0], n, m);
133 134
 	mat_print(&M[0][0], n, m);
135
+	printf("\n");
134 136
 #endif
135 137
 
136 138
 	/* backtrace to find the positions of optimal matching */
Browse code

Move scoring magic numbers into macros

John Hawthorn authored on 30/07/2014 05:05:15
Showing 1 changed files
... ...
@@ -65,6 +65,15 @@ double calculate_score(const char *needle, const char *haystack, size_t *positio
65 65
 	 * M[][] Stores the best possible score at this position.
66 66
 	 */
67 67
 
68
+#define SCORE_GAP_LEADING      -0.01
69
+#define SCORE_GAP_TRAILING     -0.01
70
+#define SCORE_GAP_INNER        -0.01
71
+#define SCORE_MATCH_CONSECUTIVE 1.0
72
+#define SCORE_MATCH_SLASH       1.5
73
+#define SCORE_MATCH_WORD        1.2
74
+#define SCORE_MATCH_CAPITAL     1.1
75
+#define SCORE_MATCH_DOT         0.8
76
+
68 77
 	/* Which positions are beginning of words */
69 78
 	char last_ch = '\0';
70 79
 	for(int i = 0; i < m; i++){
... ...
@@ -73,18 +82,18 @@ double calculate_score(const char *needle, const char *haystack, size_t *positio
73 82
 		score_t score = 0;
74 83
 		if(isalnum(ch)){
75 84
 			if(last_ch == '/'){
76
-				score = 1.5;
85
+				score = SCORE_MATCH_SLASH;
77 86
 			}else if(last_ch == '-' ||
78 87
 					last_ch == '_' ||
79 88
 					last_ch == ' ' ||
80 89
 					(last_ch >= '0' && last_ch <= '9')){
81
-				score = 1.2;
90
+				score = SCORE_MATCH_WORD;
82 91
 			}else if(last_ch >= 'a' && last_ch <= 'z' &&
83 92
 					ch >= 'A' && ch <= 'Z'){
84 93
 				/* CamelCase */
85
-				score = 1.1;
94
+				score = SCORE_MATCH_CAPITAL;
86 95
 			}else if(last_ch == '.'){
87
-				score = 0.8;
96
+				score = SCORE_MATCH_DOT;
88 97
 			}
89 98
 		}
90 99
 
... ...
@@ -102,14 +111,18 @@ double calculate_score(const char *needle, const char *haystack, size_t *positio
102 111
 					score = max(score, M[i-1][j-1] + match_bonus[j]);
103 112
 
104 113
 					/* consecutive match, doesn't stack with match_bonus */
105
-					score = max(score, D[i-1][j-1] + 1.0);
114
+					score = max(score, D[i-1][j-1] + SCORE_MATCH_CONSECUTIVE);
106 115
 				}else if(!i){
107
-					score = (j * -0.01) + match_bonus[j];
116
+					score = (j * SCORE_GAP_LEADING) + match_bonus[j];
108 117
 				}
109 118
 				D[i][j] = score;
110 119
 			}
111 120
 			if(j){
112
-				score = max(score, M[i][j-1] - 0.01);
121
+				if(i == n-1){
122
+					score = max(score, M[i][j-1] + SCORE_GAP_TRAILING);
123
+				}else{
124
+					score = max(score, M[i][j-1] + SCORE_GAP_INNER);
125
+				}
113 126
 			}
114 127
 			M[i][j] = score;
115 128
 		}
Browse code

Improve scoring

John Hawthorn authored on 30/07/2014 04:28:32
Showing 1 changed files
... ...
@@ -3,6 +3,7 @@
3 3
 #include <strings.h>
4 4
 #include <stdio.h>
5 5
 #include <float.h>
6
+#include <math.h>
6 7
 
7 8
 #include "fzy.h"
8 9
 
... ...
@@ -18,8 +19,8 @@ int has_match(const char *needle, const char *haystack){
18 19
 
19 20
 #define max(a, b) (((a) > (b)) ? (a) : (b))
20 21
 typedef double score_t;
21
-#define SCORE_MAX DBL_MAX
22
-#define SCORE_MIN -DBL_MAX
22
+#define SCORE_MAX INFINITY
23
+#define SCORE_MIN -INFINITY
23 24
 
24 25
 /* print one of the internal matrices */
25 26
 void mat_print(score_t *mat, int n, int m){
... ...
@@ -65,34 +66,52 @@ double calculate_score(const char *needle, const char *haystack, size_t *positio
65 66
 	 */
66 67
 
67 68
 	/* Which positions are beginning of words */
68
-	int at_bow = 1;
69 69
 	char last_ch = '\0';
70 70
 	for(int i = 0; i < m; i++){
71 71
 		char ch = haystack[i];
72
-		/* TODO: What about allcaps (ex. README) */
73
-		int bow = (at_bow && isalnum(ch)) || (isupper(ch) && !isupper(last_ch));
74
-		at_bow = !isalnum(ch);
75
-		last_ch = ch;
76 72
 
77
-		match_bonus[i] = bow ? 1.5 : 0;
73
+		score_t score = 0;
74
+		if(isalnum(ch)){
75
+			if(last_ch == '/'){
76
+				score = 1.5;
77
+			}else if(last_ch == '-' ||
78
+					last_ch == '_' ||
79
+					last_ch == ' ' ||
80
+					(last_ch >= '0' && last_ch <= '9')){
81
+				score = 1.2;
82
+			}else if(last_ch >= 'a' && last_ch <= 'z' &&
83
+					ch >= 'A' && ch <= 'Z'){
84
+				/* CamelCase */
85
+				score = 1.1;
86
+			}else if(last_ch == '.'){
87
+				score = 0.8;
88
+			}
89
+		}
90
+
91
+		match_bonus[i] = score;
92
+		last_ch = ch;
78 93
 	}
79 94
 
80 95
 	for(int i = 0; i < n; i++){
81 96
 		for(int j = 0; j < m; j++){
82
-			D[i][j] = SCORE_MIN;
97
+			score_t score = j ? SCORE_MIN : 0;
83 98
 			int match = tolower(needle[i]) == tolower(haystack[j]);
99
+			D[i][j] = SCORE_MIN;
84 100
 			if(match){
85
-				score_t score = 0;
86 101
 				if(i && j){
87
-					score = M[i-1][j-1] + match_bonus[j];
102
+					score = max(score, M[i-1][j-1] + match_bonus[j]);
88 103
 
89 104
 					/* consecutive match, doesn't stack with match_bonus */
90
-					score = max(score, 1 + D[i-1][j-1]);
105
+					score = max(score, D[i-1][j-1] + 1.0);
106
+				}else if(!i){
107
+					score = (j * -0.01) + match_bonus[j];
91 108
 				}
92
-				M[i][j] = D[i][j] = score;
109
+				D[i][j] = score;
110
+			}
111
+			if(j){
112
+				score = max(score, M[i][j-1] - 0.01);
93 113
 			}
94
-			if(j)
95
-				M[i][j] = max(M[i][j], M[i][j-1] - 0.05);
114
+			M[i][j] = score;
96 115
 		}
97 116
 	}
98 117
 
... ...
@@ -113,8 +132,8 @@ double calculate_score(const char *needle, const char *haystack, size_t *positio
113 132
 				 * we encounter, the latest in the candidate
114 133
 				 * string.
115 134
 				 */
116
-				if(D[i][j] == M[i][j]){
117
-					positions[i] = j;
135
+				if(tolower(needle[i]) == tolower(haystack[j]) && D[i][j] == M[i][j]){
136
+					positions[i] = j--;
118 137
 					break;
119 138
 				}
120 139
 			}
Browse code

match: Use array storing bonus for match of char

John Hawthorn authored on 27/07/2014 22:32:22
Showing 1 changed files
... ...
@@ -54,7 +54,7 @@ double calculate_score(const char *needle, const char *haystack, size_t *positio
54 54
 		return 0;
55 55
 	}
56 56
 
57
-	int bow[m];
57
+	score_t match_bonus[m];
58 58
 	score_t D[n][m], M[n][m];
59 59
 	bzero(D, sizeof(D));
60 60
 	bzero(M, sizeof(M));
... ...
@@ -70,9 +70,11 @@ double calculate_score(const char *needle, const char *haystack, size_t *positio
70 70
 	for(int i = 0; i < m; i++){
71 71
 		char ch = haystack[i];
72 72
 		/* TODO: What about allcaps (ex. README) */
73
-		bow[i] = (at_bow && isalnum(ch)) || (isupper(ch) && !isupper(last_ch));
73
+		int bow = (at_bow && isalnum(ch)) || (isupper(ch) && !isupper(last_ch));
74 74
 		at_bow = !isalnum(ch);
75 75
 		last_ch = ch;
76
+
77
+		match_bonus[i] = bow ? 1.5 : 0;
76 78
 	}
77 79
 
78 80
 	for(int i = 0; i < n; i++){
... ...
@@ -81,12 +83,12 @@ double calculate_score(const char *needle, const char *haystack, size_t *positio
81 83
 			int match = tolower(needle[i]) == tolower(haystack[j]);
82 84
 			if(match){
83 85
 				score_t score = 0;
84
-				if(i && j)
85
-					score = M[i-1][j-1];
86
-				if(bow[j])
87
-					score += 1.5;
88
-				else if(i && j && D[i-1][j-1])
86
+				if(i && j){
87
+					score = M[i-1][j-1] + match_bonus[j];
88
+
89
+					/* consecutive match, doesn't stack with match_bonus */
89 90
 					score = max(score, 1 + D[i-1][j-1]);
91
+				}
90 92
 				M[i][j] = D[i][j] = score;
91 93
 			}
92 94
 			if(j)
Browse code

debug: fix mat_print

John Hawthorn authored on 27/07/2014 05:53:33
Showing 1 changed files
... ...
@@ -16,23 +16,28 @@ int has_match(const char *needle, const char *haystack){
16 16
 	return 1;
17 17
 }
18 18
 
19
+#define max(a, b) (((a) > (b)) ? (a) : (b))
20
+typedef double score_t;
21
+#define SCORE_MAX DBL_MAX
22
+#define SCORE_MIN -DBL_MAX
23
+
19 24
 /* print one of the internal matrices */
20
-void mat_print(int *mat, int n, int m){
25
+void mat_print(score_t *mat, int n, int m){
21 26
 	int i, j;
22 27
 	for(i = 0; i < n; i++){
23 28
 		for(j = 0; j < m; j++){
24
-			fprintf(stderr, " %3i", mat[i*m + j]);
29
+			score_t val = mat[i*m + j];
30
+			if(val == SCORE_MIN){
31
+				fprintf(stderr, " -inf");
32
+			}else{
33
+				fprintf(stderr, " %.2f", val);
34
+			}
25 35
 		}
26 36
 		fprintf(stderr, "\n");
27 37
 	}
28 38
 	fprintf(stderr, "\n\n");
29 39
 }
30 40
 
31
-#define max(a, b) (((a) > (b)) ? (a) : (b))
32
-typedef double score_t;
33
-#define SCORE_MAX DBL_MAX
34
-#define SCORE_MIN -DBL_MAX
35
-
36 41
 double calculate_score(const char *needle, const char *haystack, size_t *positions){
37 42
 	if(!*haystack || !*needle)
38 43
 		return SCORE_MIN;
... ...
@@ -89,6 +94,11 @@ double calculate_score(const char *needle, const char *haystack, size_t *positio
89 94
 		}
90 95
 	}
91 96
 
97
+#if 0
98
+	mat_print(&D[0][0], n, m);
99
+	mat_print(&M[0][0], n, m);
100
+#endif
101
+
92 102
 	/* backtrace to find the positions of optimal matching */
93 103
 	if(positions){
94 104
 		for(int i = n-1, j = m-1; i >= 0; i--){
Browse code

Adjust scoring system

-0.05 for a skipped character in the candidate.
+1 for a match following a previous match
+1.5 for a match at the beginning of a word
No change for any other match.

John Hawthorn authored on 27/07/2014 05:38:37
Showing 1 changed files
... ...
@@ -29,7 +29,7 @@ void mat_print(int *mat, int n, int m){
29 29
 }
30 30
 
31 31
 #define max(a, b) (((a) > (b)) ? (a) : (b))
32
-typedef int score_t;
32
+typedef double score_t;
33 33
 #define SCORE_MAX DBL_MAX
34 34
 #define SCORE_MIN -DBL_MAX
35 35
 
... ...
@@ -50,7 +50,7 @@ double calculate_score(const char *needle, const char *haystack, size_t *positio
50 50
 	}
51 51
 
52 52
 	int bow[m];
53
-	int D[n][m], M[n][m];
53
+	score_t D[n][m], M[n][m];
54 54
 	bzero(D, sizeof(D));
55 55
 	bzero(M, sizeof(M));
56 56
 
... ...
@@ -72,39 +72,38 @@ double calculate_score(const char *needle, const char *haystack, size_t *positio
72 72
 
73 73
 	for(int i = 0; i < n; i++){
74 74
 		for(int j = 0; j < m; j++){
75
+			D[i][j] = SCORE_MIN;
75 76
 			int match = tolower(needle[i]) == tolower(haystack[j]);
76 77
 			if(match){
77 78
 				score_t score = 0;
78 79
 				if(i && j)
79 80
 					score = M[i-1][j-1];
80 81
 				if(bow[j])
81
-					score += 2;
82
+					score += 1.5;
82 83
 				else if(i && j && D[i-1][j-1])
83 84
 					score = max(score, 1 + D[i-1][j-1]);
84 85
 				M[i][j] = D[i][j] = score;
85 86
 			}
86 87
 			if(j)
87
-				M[i][j] = max(M[i][j], M[i][j-1]);
88
+				M[i][j] = max(M[i][j], M[i][j-1] - 0.05);
88 89
 		}
89 90
 	}
90 91
 
91 92
 	/* backtrace to find the positions of optimal matching */
92 93
 	if(positions){
93 94
 		for(int i = n-1, j = m-1; i >= 0; i--){
94
-			int last = M[i][j];
95
-			for(; j >= 0 && M[i][j] == last; j--){
95
+			for(; j >= 0; j--){
96 96
 				/*
97 97
 				 * There may be multiple paths which result in
98 98
 				 * the optimal weight.
99 99
 				 *
100
-				 * Since we don't exit the loop on the first
101
-				 * match, positions[i] may be assigned to
102
-				 * multiple times. Since we are decrementing i
103
-				 * and j, this favours the optimal path
104
-				 * occurring earlier in the string.
100
+				 * For simplicity, we will pick the first one
101
+				 * we encounter, the latest in the candidate
102
+				 * string.
105 103
 				 */
106
-				if(tolower(needle[i]) == tolower(haystack[j])){
104
+				if(D[i][j] == M[i][j]){
107 105
 					positions[i] = j;
106
+					break;
108 107
 				}
109 108
 			}
110 109
 		}
Browse code

Better values for SCORE_{MIN,MAX}

SCORE_MAX is now defined as DBL_MAX
SCORE_MIN is now defined as -DBL_MAX

John Hawthorn authored on 27/07/2014 02:59:27
Showing 1 changed files
... ...
@@ -2,11 +2,10 @@
2 2
 #include <string.h>
3 3
 #include <strings.h>
4 4
 #include <stdio.h>
5
+#include <float.h>
5 6
 
6 7
 #include "fzy.h"
7 8
 
8
-#define SCORE_MIN -1
9
-
10 9
 int has_match(const char *needle, const char *haystack){
11 10
 	while(*needle){
12 11
 		if(!*haystack)
... ...
@@ -31,6 +30,8 @@ void mat_print(int *mat, int n, int m){
31 30
 
32 31
 #define max(a, b) (((a) > (b)) ? (a) : (b))
33 32
 typedef int score_t;
33
+#define SCORE_MAX DBL_MAX
34
+#define SCORE_MIN -DBL_MAX
34 35
 
35 36
 double calculate_score(const char *needle, const char *haystack, size_t *positions){
36 37
 	if(!*haystack || !*needle)
... ...
@@ -114,7 +115,7 @@ double calculate_score(const char *needle, const char *haystack, size_t *positio
114 115
 
115 116
 double match_positions(const char *needle, const char *haystack, size_t *positions){
116 117
 	if(!*needle){
117
-		return 1.0;
118
+		return SCORE_MAX;
118 119
 	}else if(!has_match(needle, haystack)){
119 120
 		return SCORE_MIN;
120 121
 	}else if(!strcasecmp(needle, haystack)){
... ...
@@ -123,7 +124,7 @@ double match_positions(const char *needle, const char *haystack, size_t *positio
123 124
 			for(int i = 0; i < n; i++)
124 125
 				positions[i] = i;
125 126
 		}
126
-		return 1.0;
127
+		return SCORE_MAX;
127 128
 	}else{
128 129
 		return calculate_score(needle, haystack, positions);
129 130
 	}
Browse code

Don't require scores to be positive

Previously a successful match was determined by the score being
positive. Now we will use has_match instead.

John Hawthorn authored on 26/07/2014 09:44:12
Showing 1 changed files
... ...
@@ -7,7 +7,7 @@
7 7
 
8 8
 #define SCORE_MIN -1
9 9
 
10
-static int is_subset(const char *needle, const char *haystack){
10
+int has_match(const char *needle, const char *haystack){
11 11
 	while(*needle){
12 12
 		if(!*haystack)
13 13
 			return 0;
... ...
@@ -115,7 +115,7 @@ double calculate_score(const char *needle, const char *haystack, size_t *positio
115 115
 double match_positions(const char *needle, const char *haystack, size_t *positions){
116 116
 	if(!*needle){
117 117
 		return 1.0;
118
-	}else if(!is_subset(needle, haystack)){
118
+	}else if(!has_match(needle, haystack)){
119 119
 		return SCORE_MIN;
120 120
 	}else if(!strcasecmp(needle, haystack)){
121 121
 		if(positions){
Browse code

Move declarations into fzy.h

John Hawthorn authored on 26/07/2014 09:37:24
Showing 1 changed files
... ...
@@ -3,6 +3,8 @@
3 3
 #include <strings.h>
4 4
 #include <stdio.h>
5 5
 
6
+#include "fzy.h"
7
+
6 8
 #define SCORE_MIN -1
7 9
 
8 10
 static int is_subset(const char *needle, const char *haystack){
Browse code

Highlight matched characters

John Hawthorn authored on 26/07/2014 09:27:31
Showing 1 changed files
... ...
@@ -30,7 +30,7 @@ void mat_print(int *mat, int n, int m){
30 30
 #define max(a, b) (((a) > (b)) ? (a) : (b))
31 31
 typedef int score_t;
32 32
 
33
-double calculate_score(const char *needle, const char *haystack){
33
+double calculate_score(const char *needle, const char *haystack, size_t *positions){
34 34
 	if(!*haystack || !*needle)
35 35
 		return SCORE_MIN;
36 36
 
... ...
@@ -85,17 +85,48 @@ double calculate_score(const char *needle, const char *haystack){
85 85
 		}
86 86
 	}
87 87
 
88
+	/* backtrace to find the positions of optimal matching */
89
+	if(positions){
90
+		for(int i = n-1, j = m-1; i >= 0; i--){
91
+			int last = M[i][j];
92
+			for(; j >= 0 && M[i][j] == last; j--){
93
+				/*
94
+				 * There may be multiple paths which result in
95
+				 * the optimal weight.
96
+				 *
97
+				 * Since we don't exit the loop on the first
98
+				 * match, positions[i] may be assigned to
99
+				 * multiple times. Since we are decrementing i
100
+				 * and j, this favours the optimal path
101
+				 * occurring earlier in the string.
102
+				 */
103
+				if(tolower(needle[i]) == tolower(haystack[j])){
104
+					positions[i] = j;
105
+				}
106
+			}
107
+		}
108
+	}
109
+
88 110
 	return (float)(M[n-1][m-1]) / (float)(n * 2 + 1);
89 111
 }
90 112
 
91
-double match(const char *needle, const char *haystack){
113
+double match_positions(const char *needle, const char *haystack, size_t *positions){
92 114
 	if(!*needle){
93 115
 		return 1.0;
94 116
 	}else if(!is_subset(needle, haystack)){
95 117
 		return SCORE_MIN;
96 118
 	}else if(!strcasecmp(needle, haystack)){
119
+		if(positions){
120
+			int n = strlen(needle);
121
+			for(int i = 0; i < n; i++)
122
+				positions[i] = i;
123
+		}
97 124
 		return 1.0;
98 125
 	}else{
99
-		return calculate_score(needle, haystack);
126
+		return calculate_score(needle, haystack, positions);
100 127
 	}
101 128
 }
129
+
130
+double match(const char *needle, const char *haystack){
131
+	return match_positions(needle, haystack, NULL);
132
+}
Browse code

Don't treat consecitive capitals as BOW

Only treat the first letter of an ALLCAPS word as the biginning of that
word (it is not CamelCase).

John Hawthorn authored on 18/07/2014 04:48:06
Showing 1 changed files
... ...
@@ -58,11 +58,13 @@ double calculate_score(const char *needle, const char *haystack){
58 58
 
59 59
 	/* Which positions are beginning of words */
60 60
 	int at_bow = 1;
61
+	char last_ch = '\0';
61 62
 	for(int i = 0; i < m; i++){
62 63
 		char ch = haystack[i];
63 64
 		/* TODO: What about allcaps (ex. README) */
64
-		bow[i] = (at_bow && isalnum(ch)) || isupper(ch);
65
+		bow[i] = (at_bow && isalnum(ch)) || (isupper(ch) && !isupper(last_ch));
65 66
 		at_bow = !isalnum(ch);
67
+		last_ch = ch;
66 68
 	}
67 69
 
68 70
 	for(int i = 0; i < n; i++){
Browse code

Skip calculate_score for long candidates

John Hawthorn authored on 15/07/2014 07:37:47
Showing 1 changed files
... ...
@@ -31,9 +31,21 @@ void mat_print(int *mat, int n, int m){
31 31
 typedef int score_t;
32 32
 
33 33
 double calculate_score(const char *needle, const char *haystack){
34
+	if(!*haystack || !*needle)
35
+		return SCORE_MIN;
36
+
34 37
 	int n = strlen(needle);
35 38
 	int m = strlen(haystack);
36 39
 
40
+	if(m > 1024){
41
+		/*
42
+		 * Unreasonably large candidate: return no score
43
+		 * If it is a valid match it will still be returned, it will
44
+		 * just be ranked below any reasonably sized candidates
45
+		 */
46
+		return 0;
47
+	}
48
+
37 49
 	int bow[m];
38 50
 	int D[n][m], M[n][m];
39 51
 	bzero(D, sizeof(D));
Browse code

define SCORE_MIN -1

John Hawthorn authored on 15/07/2014 07:30:35
Showing 1 changed files
... ...
@@ -3,6 +3,8 @@
3 3
 #include <strings.h>
4 4
 #include <stdio.h>
5 5
 
6
+#define SCORE_MIN -1
7
+
6 8
 static int is_subset(const char *needle, const char *haystack){
7 9
 	while(*needle){
8 10
 		if(!*haystack)
... ...
@@ -76,7 +78,7 @@ double match(const char *needle, const char *haystack){
76 78
 	if(!*needle){
77 79
 		return 1.0;
78 80
 	}else if(!is_subset(needle, haystack)){
79
-		return -1.0;
81
+		return SCORE_MIN;
80 82
 	}else if(!strcasecmp(needle, haystack)){
81 83
 		return 1.0;
82 84
 	}else{
Browse code

Fix warnings

John Hawthorn authored on 13/07/2014 05:16:13
Showing 1 changed files
... ...
@@ -18,7 +18,7 @@ void mat_print(int *mat, int n, int m){
18 18
 	int i, j;
19 19
 	for(i = 0; i < n; i++){
20 20
 		for(j = 0; j < m; j++){
21
-			fprintf(stderr, " %3zd", mat[i*m + j]);
21
+			fprintf(stderr, " %3i", mat[i*m + j]);
22 22
 		}
23 23
 		fprintf(stderr, "\n");
24 24
 	}
Browse code

New DP algorithm match scoring algorithm

John Hawthorn authored on 13/07/2014 00:45:07
Showing 1 changed files
... ...
@@ -1,5 +1,7 @@
1 1
 #include <ctype.h>
2 2
 #include <string.h>
3
+#include <strings.h>
4
+#include <stdio.h>
3 5
 
4 6
 static int is_subset(const char *needle, const char *haystack){
5 7
 	while(*needle){
... ...
@@ -11,14 +13,73 @@ static int is_subset(const char *needle, const char *haystack){
11 13
 	return 1;
12 14
 }
13 15
 
16
+/* print one of the internal matrices */
17
+void mat_print(int *mat, int n, int m){
18
+	int i, j;
19
+	for(i = 0; i < n; i++){
20
+		for(j = 0; j < m; j++){
21
+			fprintf(stderr, " %3zd", mat[i*m + j]);
22
+		}
23
+		fprintf(stderr, "\n");
24
+	}
25
+	fprintf(stderr, "\n\n");
26
+}
27
+
28
+#define max(a, b) (((a) > (b)) ? (a) : (b))
29
+typedef int score_t;
30
+
31
+double calculate_score(const char *needle, const char *haystack){
32
+	int n = strlen(needle);
33
+	int m = strlen(haystack);
34
+
35
+	int bow[m];
36
+	int D[n][m], M[n][m];
37
+	bzero(D, sizeof(D));
38
+	bzero(M, sizeof(M));
39
+
40
+	/*
41
+	 * D[][] Stores the best score for this position ending with a match.
42
+	 * M[][] Stores the best possible score at this position.
43
+	 */
44
+
45
+	/* Which positions are beginning of words */
46
+	int at_bow = 1;
47
+	for(int i = 0; i < m; i++){
48
+		char ch = haystack[i];
49
+		/* TODO: What about allcaps (ex. README) */
50
+		bow[i] = (at_bow && isalnum(ch)) || isupper(ch);
51
+		at_bow = !isalnum(ch);
52
+	}
53
+
54
+	for(int i = 0; i < n; i++){
55
+		for(int j = 0; j < m; j++){
56
+			int match = tolower(needle[i]) == tolower(haystack[j]);
57
+			if(match){
58
+				score_t score = 0;
59
+				if(i && j)
60
+					score = M[i-1][j-1];
61
+				if(bow[j])
62
+					score += 2;
63
+				else if(i && j && D[i-1][j-1])
64
+					score = max(score, 1 + D[i-1][j-1]);
65
+				M[i][j] = D[i][j] = score;
66
+			}
67
+			if(j)
68
+				M[i][j] = max(M[i][j], M[i][j-1]);
69
+		}
70
+	}
71
+
72
+	return (float)(M[n-1][m-1]) / (float)(n * 2 + 1);
73
+}
74
+
14 75
 double match(const char *needle, const char *haystack){
15 76
 	if(!*needle){
16 77
 		return 1.0;
17 78
 	}else if(!is_subset(needle, haystack)){
18
-		return 0.0;
79
+		return -1.0;
19 80
 	}else if(!strcasecmp(needle, haystack)){
20 81
 		return 1.0;
21 82
 	}else{
22
-		return 0.9;
83
+		return calculate_score(needle, haystack);
23 84
 	}
24 85
 }
Browse code

Add some special cases

John Hawthorn authored on 12/07/2014 22:22:44
Showing 1 changed files
... ...
@@ -1,4 +1,5 @@
1 1
 #include <ctype.h>
2
+#include <string.h>
2 3
 
3 4
 static int is_subset(const char *needle, const char *haystack){
4 5
 	while(*needle){
... ...
@@ -11,8 +12,13 @@ static int is_subset(const char *needle, const char *haystack){
11 12
 }
12 13
 
13 14
 double match(const char *needle, const char *haystack){
14
-	if(!is_subset(needle, haystack)){
15
+	if(!*needle){
16
+		return 1.0;
17
+	}else if(!is_subset(needle, haystack)){
15 18
 		return 0.0;
19
+	}else if(!strcasecmp(needle, haystack)){
20
+		return 1.0;
21
+	}else{
22
+		return 0.9;
16 23
 	}
17
-	return 1.0;
18 24
 }
Browse code

Refactor into is_subset function

John Hawthorn authored on 12/07/2014 22:18:56
Showing 1 changed files
... ...
@@ -1,12 +1,18 @@
1 1
 #include <ctype.h>
2 2
 
3
-double match(const char *needle, const char *haystack){
3
+static int is_subset(const char *needle, const char *haystack){
4 4
 	while(*needle){
5 5
 		if(!*haystack)
6
-			return 0.0;
7
-		while(*haystack && tolower(*needle) == tolower(*haystack++)){
6
+			return 0;
7
+		while(*haystack && tolower(*needle) == tolower(*haystack++))
8 8
 			needle++;
9
-		}
9
+	}
10
+	return 1;
11
+}
12
+
13
+double match(const char *needle, const char *haystack){
14
+	if(!is_subset(needle, haystack)){
15
+		return 0.0;
10 16
 	}
11 17
 	return 1.0;
12 18
 }
Browse code

Add tests and split matching into match.c

John Hawthorn authored on 12/07/2014 22:07:22
Showing 1 changed files
1 1
new file mode 100644
... ...
@@ -0,0 +1,12 @@
1
+#include <ctype.h>
2
+
3
+double match(const char *needle, const char *haystack){
4
+	while(*needle){
5
+		if(!*haystack)
6
+			return 0.0;
7
+		while(*haystack && tolower(*needle) == tolower(*haystack++)){
8
+			needle++;
9
+		}
10
+	}
11
+	return 1.0;
12
+}