Browse code

Ignore escape sequences in has_match

No scoring code is changed, so this probably needs more work to be fully
functional.

Robert Cranston authored on 03/12/2025 12:25:52
Showing 1 changed files
... ...
@@ -11,19 +11,83 @@
11 11
 
12 12
 #include "../config.h"
13 13
 
14
+const char *ignore_escapes(const char *haystack) {
15
+	enum {
16
+		state_default,
17
+		state_escaped,
18
+		state_in_csi,
19
+		state_start_osc,
20
+		state_in_osc,
21
+		state_ignore_next,
22
+	} state = state_default;
23
+	do switch (state) {
24
+		case state_default: switch (*haystack) {
25
+			case '\x1b':
26
+				state = state_escaped; break;
27
+			case '\x7':
28
+				return ++haystack;
29
+			default:
30
+				return haystack;
31
+		}; break;
32
+		case state_escaped: switch (*haystack) {
33
+			case '[':
34
+				state = state_in_csi; break;
35
+			case ']':
36
+				state = state_start_osc; break;
37
+			case '%': case '(': case ')': case '#':
38
+			case '0': case '3': case '5': case '6':
39
+				state = state_ignore_next; break;
40
+			case '<': case '=': case '>': case '\x7':
41
+			case '1': case '2': case '7': case '8':
42
+			case 'c': case 's': case 'u':
43
+			case 'A': case 'B': case 'C': case 'D': case 'E':
44
+			case 'H': case 'I': case 'J': case 'K': case 'M':
45
+			case 'N': case 'O': case 'S': case 'T': case 'Z':
46
+				return ++haystack;
47
+			default:
48
+				return haystack;
49
+		}; break;
50
+		case state_in_csi: switch (*haystack) {
51
+			case ';': case '?':
52
+			case '0': case '1': case '2': case '3': case '4':
53
+			case '5': case '6': case '7': case '8': case '9':
54
+				break;
55
+			default:
56
+				return ++haystack;
57
+		}; break;
58
+		case state_start_osc: switch (*haystack) {
59
+			case '0': case '1': case '2': case '3': case '4':
60
+			case '5': case '6': case '7': case '8': case '9':
61
+				state = state_in_osc; break;
62
+			default:
63
+				return ++haystack;
64
+		}; break;
65
+		case state_in_osc: switch (*haystack) {
66
+			case '\x7':
67
+				return ++haystack;
68
+			case '\x1b':
69
+				state = state_ignore_next; break;
70
+		}; break;
71
+		case state_ignore_next:
72
+			return ++haystack;
73
+	} while (*++haystack);
74
+	return haystack;
75
+}
76
+
14 77
 char *strcasechr(const char *s, char c) {
15 78
 	const char accept[3] = {c, toupper(c), 0};
16 79
 	return strpbrk(s, accept);
17 80
 }
18 81
 
19 82
 int has_match(const char *needle, const char *haystack) {
83
+	haystack = ignore_escapes(haystack);
20 84
 	while (*needle) {
21 85
 		char nch = *needle++;
22 86
 
23 87
 		if (!(haystack = strcasechr(haystack, nch))) {
24 88
 			return 0;
25 89
 		}
26
-		haystack++;
90
+		haystack = ignore_escapes(++haystack);
27 91
 	}
28 92
 	return 1;
29 93
 }
Browse code

Avoid possible compiler warning

John Hawthorn authored on 12/07/2025 05:00:43
Showing 1 changed files
... ...
@@ -79,7 +79,8 @@ static inline void match_row(const struct match_struct *match, int row, score_t
79 79
 	score_t prev_score = SCORE_MIN;
80 80
 	score_t gap_score = i == n - 1 ? SCORE_GAP_TRAILING : SCORE_GAP_INNER;
81 81
 
82
-	score_t prev_M, prev_D;
82
+	/* These will not be used with this value, but not all compilers see it */
83
+	score_t prev_M = SCORE_MIN, prev_D = SCORE_MIN;
83 84
 
84 85
 	for (int j = 0; j < m; j++) {
85 86
 		if (lower_needle[i] == lower_haystack[j]) {
Browse code

Simplify Match Algorithm

Binh Tran authored on 18/01/2025 13:38:38
Showing 1 changed files
... ...
@@ -28,8 +28,6 @@ int has_match(const char *needle, const char *haystack) {
28 28
 	return 1;
29 29
 }
30 30
 
31
-#define SWAP(x, y, T) do { T SWAP = x; x = y; y = SWAP; } while (0)
32
-
33 31
 #define max(a, b) (((a) > (b)) ? (a) : (b))
34 32
 
35 33
 struct match_struct {
... ...
@@ -81,6 +79,8 @@ static inline void match_row(const struct match_struct *match, int row, score_t
81 79
 	score_t prev_score = SCORE_MIN;
82 80
 	score_t gap_score = i == n - 1 ? SCORE_GAP_TRAILING : SCORE_GAP_INNER;
83 81
 
82
+	score_t prev_M, prev_D;
83
+
84 84
 	for (int j = 0; j < m; j++) {
85 85
 		if (lower_needle[i] == lower_haystack[j]) {
86 86
 			score_t score = SCORE_MIN;
... ...
@@ -88,14 +88,18 @@ static inline void match_row(const struct match_struct *match, int row, score_t
88 88
 				score = (j * SCORE_GAP_LEADING) + match_bonus[j];
89 89
 			} else if (j) { /* i > 0 && j > 0*/
90 90
 				score = max(
91
-						last_M[j - 1] + match_bonus[j],
91
+						prev_M + match_bonus[j],
92 92
 
93 93
 						/* consecutive match, doesn't stack with match_bonus */
94
-						last_D[j - 1] + SCORE_MATCH_CONSECUTIVE);
94
+						prev_D + SCORE_MATCH_CONSECUTIVE);
95 95
 			}
96
+			prev_D = last_D[j];
97
+			prev_M = last_M[j];
96 98
 			curr_D[j] = score;
97 99
 			curr_M[j] = prev_score = max(score, prev_score + gap_score);
98 100
 		} else {
101
+			prev_D = last_D[j];
102
+			prev_M = last_M[j];
99 103
 			curr_D[j] = SCORE_MIN;
100 104
 			curr_M[j] = prev_score = prev_score + gap_score;
101 105
 		}
... ...
@@ -131,24 +135,13 @@ score_t match(const char *needle, const char *haystack) {
131 135
 	 * D[][] Stores the best score for this position ending with a match.
132 136
 	 * M[][] Stores the best possible score at this position.
133 137
 	 */
134
-	score_t D[2][MATCH_MAX_LEN], M[2][MATCH_MAX_LEN];
135
-
136
-	score_t *last_D, *last_M;
137
-	score_t *curr_D, *curr_M;
138
-
139
-	last_D = D[0];
140
-	last_M = M[0];
141
-	curr_D = D[1];
142
-	curr_M = M[1];
138
+	score_t D[MATCH_MAX_LEN], M[MATCH_MAX_LEN];
143 139
 
144 140
 	for (int i = 0; i < n; i++) {
145
-		match_row(&match, i, curr_D, curr_M, last_D, last_M);
146
-
147
-		SWAP(curr_D, last_D, score_t *);
148
-		SWAP(curr_M, last_M, score_t *);
141
+		match_row(&match, i, D, M, D, M);
149 142
 	}
150 143
 
151
-	return last_M[m - 1];
144
+	return M[m - 1];
152 145
 }
153 146
 
154 147
 score_t match_positions(const char *needle, const char *haystack, size_t *positions) {
... ...
@@ -187,17 +180,9 @@ score_t match_positions(const char *needle, const char *haystack, size_t *positi
187 180
 	M = malloc(sizeof(score_t) * MATCH_MAX_LEN * n);
188 181
 	D = malloc(sizeof(score_t) * MATCH_MAX_LEN * n);
189 182
 
190
-	score_t *last_D = NULL, *last_M = NULL;
191
-	score_t *curr_D, *curr_M;
192
-
193
-	for (int i = 0; i < n; i++) {
194
-		curr_D = &D[i][0];
195
-		curr_M = &M[i][0];
196
-
197
-		match_row(&match, i, curr_D, curr_M, last_D, last_M);
198
-
199
-		last_D = curr_D;
200
-		last_M = curr_M;
183
+	match_row(&match, 0, D[0], M[0], D[0], M[0]);
184
+	for (int i = 1; i < n; i++) {
185
+		match_row(&match, i, D[i], M[i], D[i - 1], M[i - 1]);
201 186
 	}
202 187
 
203 188
 	/* backtrace to find the positions of optimal matching */
Browse code

Initialize avoid uninitialized variable warning

These pointers were never dereferenced in their uninitialized form, as
that only happens for i > 0, but we should do this to avoid the compiler
warning.

John Hawthorn authored on 07/12/2024 06:16:45
Showing 1 changed files
... ...
@@ -187,7 +187,7 @@ score_t match_positions(const char *needle, const char *haystack, size_t *positi
187 187
 	M = malloc(sizeof(score_t) * MATCH_MAX_LEN * n);
188 188
 	D = malloc(sizeof(score_t) * MATCH_MAX_LEN * n);
189 189
 
190
-	score_t *last_D, *last_M;
190
+	score_t *last_D = NULL, *last_M = NULL;
191 191
 	score_t *curr_D, *curr_M;
192 192
 
193 193
 	for (int i = 0; i < n; i++) {
Browse code

Check for too long haystack

Fixes #145

John Hawthorn authored on 09/08/2020 02:42:58
Showing 1 changed files
... ...
@@ -56,7 +56,7 @@ static void setup_match_struct(struct match_struct *match, const char *needle, c
56 56
 	match->needle_len = strlen(needle);
57 57
 	match->haystack_len = strlen(haystack);
58 58
 
59
-	if (match->needle_len > MATCH_MAX_LEN || match->needle_len > match->haystack_len) {
59
+	if (match->haystack_len > MATCH_MAX_LEN || match->needle_len > match->haystack_len) {
60 60
 		return;
61 61
 	}
62 62
 
Browse code

Avoid VLA in tty_interface

John Hawthorn authored on 28/12/2019 03:39:50
Showing 1 changed files
... ...
@@ -32,8 +32,6 @@ int has_match(const char *needle, const char *haystack) {
32 32
 
33 33
 #define max(a, b) (((a) > (b)) ? (a) : (b))
34 34
 
35
-#define MATCH_MAX_LEN 1024
36
-
37 35
 struct match_struct {
38 36
 	int needle_len;
39 37
 	int haystack_len;
Browse code

Remove strlen in precompute_bonus

Pretty sure this was optimized out anyways, but this is cleaner IMO.

John Hawthorn authored on 28/12/2019 03:09:11
Showing 1 changed files
... ...
@@ -46,9 +46,8 @@ struct match_struct {
46 46
 
47 47
 static void precompute_bonus(const char *haystack, score_t *match_bonus) {
48 48
 	/* Which positions are beginning of words */
49
-	int m = strlen(haystack);
50 49
 	char last_ch = '/';
51
-	for (int i = 0; i < m; i++) {
50
+	for (int i = 0; haystack[i]; i++) {
52 51
 		char ch = haystack[i];
53 52
 		match_bonus[i] = COMPUTE_BONUS(last_ch, ch);
54 53
 		last_ch = ch;
Browse code

Use malloc in match_positions to avoid VLA

John Hawthorn authored on 28/12/2019 02:43:46
Showing 1 changed files
... ...
@@ -186,7 +186,9 @@ score_t match_positions(const char *needle, const char *haystack, size_t *positi
186 186
 	 * D[][] Stores the best score for this position ending with a match.
187 187
 	 * M[][] Stores the best possible score at this position.
188 188
 	 */
189
-	score_t D[n][m], M[n][m];
189
+	score_t (*D)[MATCH_MAX_LEN], (*M)[MATCH_MAX_LEN];
190
+	M = malloc(sizeof(score_t) * MATCH_MAX_LEN * n);
191
+	D = malloc(sizeof(score_t) * MATCH_MAX_LEN * n);
190 192
 
191 193
 	score_t *last_D, *last_M;
192 194
 	score_t *curr_D, *curr_M;
... ...
@@ -230,5 +232,10 @@ score_t match_positions(const char *needle, const char *haystack, size_t *positi
230 232
 		}
231 233
 	}
232 234
 
233
-	return M[n - 1][m - 1];
235
+	score_t result = M[n - 1][m - 1];
236
+
237
+	free(M);
238
+	free(D);
239
+
240
+	return result;
234 241
 }
Browse code

Remove DEBUG_VERBOSE

John Hawthorn authored on 28/12/2019 02:42:25
Showing 1 changed files
... ...
@@ -32,33 +32,6 @@ int has_match(const char *needle, const char *haystack) {
32 32
 
33 33
 #define max(a, b) (((a) > (b)) ? (a) : (b))
34 34
 
35
-#ifdef DEBUG_VERBOSE
36
-/* print one of the internal matrices */
37
-void mat_print(score_t *mat, char name, const char *needle, const char *haystack) {
38
-	int n = strlen(needle);
39
-	int m = strlen(haystack);
40
-	int i, j;
41
-	fprintf(stderr, "%c   ", name);
42
-	for (j = 0; j < m; j++) {
43
-		fprintf(stderr, "     %c", haystack[j]);
44
-	}
45
-	fprintf(stderr, "\n");
46
-	for (i = 0; i < n; i++) {
47
-		fprintf(stderr, " %c |", needle[i]);
48
-		for (j = 0; j < m; j++) {
49
-			score_t val = mat[i * m + j];
50
-			if (val == SCORE_MIN) {
51
-				fprintf(stderr, "    -\u221E");
52
-			} else {
53
-				fprintf(stderr, " %.3f", val);
54
-			}
55
-		}
56
-		fprintf(stderr, "\n");
57
-	}
58
-	fprintf(stderr, "\n\n");
59
-}
60
-#endif
61
-
62 35
 #define MATCH_MAX_LEN 1024
63 36
 
64 37
 struct match_struct {
... ...
@@ -228,13 +201,6 @@ score_t match_positions(const char *needle, const char *haystack, size_t *positi
228 201
 		last_M = curr_M;
229 202
 	}
230 203
 
231
-#ifdef DEBUG_VERBOSE
232
-	fprintf(stderr, "\"%s\" =~ \"%s\"\n", needle, haystack);
233
-	mat_print(&D[0][0], 'D', needle, haystack);
234
-	mat_print(&M[0][0], 'M', needle, haystack);
235
-	fprintf(stderr, "\n");
236
-#endif
237
-
238 204
 	/* backtrace to find the positions of optimal matching */
239 205
 	if (positions) {
240 206
 		int match_required = 0;
Browse code

Reduce memory and avoid VLA in match()

When we're matching without recording positions, we only need to keep
two rows in memory at a time.

John Hawthorn authored on 28/12/2019 02:39:01
Showing 1 changed files
... ...
@@ -28,6 +28,8 @@ int has_match(const char *needle, const char *haystack) {
28 28
 	return 1;
29 29
 }
30 30
 
31
+#define SWAP(x, y, T) do { T SWAP = x; x = y; y = SWAP; } while (0)
32
+
31 33
 #define max(a, b) (((a) > (b)) ? (a) : (b))
32 34
 
33 35
 #ifdef DEBUG_VERBOSE
... ...
@@ -159,22 +161,24 @@ score_t match(const char *needle, const char *haystack) {
159 161
 	 * D[][] Stores the best score for this position ending with a match.
160 162
 	 * M[][] Stores the best possible score at this position.
161 163
 	 */
162
-	score_t D[n][m], M[n][m];
164
+	score_t D[2][MATCH_MAX_LEN], M[2][MATCH_MAX_LEN];
163 165
 
164 166
 	score_t *last_D, *last_M;
165 167
 	score_t *curr_D, *curr_M;
166 168
 
167
-	for (int i = 0; i < n; i++) {
168
-		curr_D = &D[i][0];
169
-		curr_M = &M[i][0];
169
+	last_D = D[0];
170
+	last_M = M[0];
171
+	curr_D = D[1];
172
+	curr_M = M[1];
170 173
 
174
+	for (int i = 0; i < n; i++) {
171 175
 		match_row(&match, i, curr_D, curr_M, last_D, last_M);
172 176
 
173
-		last_D = curr_D;
174
-		last_M = curr_M;
177
+		SWAP(curr_D, last_D, score_t *);
178
+		SWAP(curr_M, last_M, score_t *);
175 179
 	}
176 180
 
177
-	return M[n - 1][m - 1];
181
+	return last_M[m - 1];
178 182
 }
179 183
 
180 184
 score_t match_positions(const char *needle, const char *haystack, size_t *positions) {
Browse code

Split match and match_postitions

John Hawthorn authored on 28/12/2019 02:35:49
Showing 1 changed files
... ...
@@ -4,6 +4,7 @@
4 4
 #include <stdio.h>
5 5
 #include <float.h>
6 6
 #include <math.h>
7
+#include <stdlib.h>
7 8
 
8 9
 #include "match.h"
9 10
 #include "bonus.h"
... ...
@@ -129,6 +130,53 @@ static inline void match_row(const struct match_struct *match, int row, score_t
129 130
 	}
130 131
 }
131 132
 
133
+score_t match(const char *needle, const char *haystack) {
134
+	if (!*needle)
135
+		return SCORE_MIN;
136
+
137
+	struct match_struct match;
138
+	setup_match_struct(&match, needle, haystack);
139
+
140
+	int n = match.needle_len;
141
+	int m = match.haystack_len;
142
+
143
+	if (m > MATCH_MAX_LEN || n > m) {
144
+		/*
145
+		 * Unreasonably large candidate: return no score
146
+		 * If it is a valid match it will still be returned, it will
147
+		 * just be ranked below any reasonably sized candidates
148
+		 */
149
+		return SCORE_MIN;
150
+	} else if (n == m) {
151
+		/* Since this method can only be called with a haystack which
152
+		 * matches needle. If the lengths of the strings are equal the
153
+		 * strings themselves must also be equal (ignoring case).
154
+		 */
155
+		return SCORE_MAX;
156
+	}
157
+
158
+	/*
159
+	 * D[][] Stores the best score for this position ending with a match.
160
+	 * M[][] Stores the best possible score at this position.
161
+	 */
162
+	score_t D[n][m], M[n][m];
163
+
164
+	score_t *last_D, *last_M;
165
+	score_t *curr_D, *curr_M;
166
+
167
+	for (int i = 0; i < n; i++) {
168
+		curr_D = &D[i][0];
169
+		curr_M = &M[i][0];
170
+
171
+		match_row(&match, i, curr_D, curr_M, last_D, last_M);
172
+
173
+		last_D = curr_D;
174
+		last_M = curr_M;
175
+	}
176
+
177
+	return M[n - 1][m - 1];
178
+}
179
+
132 180
 score_t match_positions(const char *needle, const char *haystack, size_t *positions) {
133 181
 	if (!*needle)
134 182
 		return SCORE_MIN;
... ...
@@ -214,7 +262,3 @@ score_t match_positions(const char *needle, const char *haystack, size_t *positi
214 262
 
215 263
 	return M[n - 1][m - 1];
216 264
 }
217
-
218
-score_t match(const char *needle, const char *haystack) {
219
-	return match_positions(needle, haystack, NULL);
220
-}
Browse code

Extract row matching into own method

John Hawthorn authored on 28/12/2019 02:16:48
Showing 1 changed files
... ...
@@ -96,6 +96,39 @@ static void setup_match_struct(struct match_struct *match, const char *needle, c
96 96
 	precompute_bonus(haystack, match->match_bonus);
97 97
 }
98 98
 
99
+static inline void match_row(const struct match_struct *match, int row, score_t *curr_D, score_t *curr_M, const score_t *last_D, const score_t *last_M) {
100
+	int n = match->needle_len;
101
+	int m = match->haystack_len;
102
+	int i = row;
103
+
104
+	const char *lower_needle = match->lower_needle;
105
+	const char *lower_haystack = match->lower_haystack;
106
+	const score_t *match_bonus = match->match_bonus;
107
+
108
+	score_t prev_score = SCORE_MIN;
109
+	score_t gap_score = i == n - 1 ? SCORE_GAP_TRAILING : SCORE_GAP_INNER;
110
+
111
+	for (int j = 0; j < m; j++) {
112
+		if (lower_needle[i] == lower_haystack[j]) {
113
+			score_t score = SCORE_MIN;
114
+			if (!i) {
115
+				score = (j * SCORE_GAP_LEADING) + match_bonus[j];
116
+			} else if (j) { /* i > 0 && j > 0*/
117
+				score = max(
118
+						last_M[j - 1] + match_bonus[j],
119
+
120
+						/* consecutive match, doesn't stack with match_bonus */
121
+						last_D[j - 1] + SCORE_MATCH_CONSECUTIVE);
122
+			}
123
+			curr_D[j] = score;
124
+			curr_M[j] = prev_score = max(score, prev_score + gap_score);
125
+		} else {
126
+			curr_D[j] = SCORE_MIN;
127
+			curr_M[j] = prev_score = prev_score + gap_score;
128
+		}
129
+	}
130
+}
131
+
99 132
 score_t match_positions(const char *needle, const char *haystack, size_t *positions) {
100 133
 	if (!*needle)
101 134
 		return SCORE_MIN;
... ...
@@ -124,10 +157,6 @@ score_t match_positions(const char *needle, const char *haystack, size_t *positi
124 157
 		return SCORE_MAX;
125 158
 	}
126 159
 
127
-	const char *lower_needle = match.lower_needle;
128
-	const char *lower_haystack = match.lower_haystack;
129
-	const score_t *match_bonus = match.match_bonus;
130
-
131 160
 	/*
132 161
 	 * D[][] Stores the best score for this position ending with a match.
133 162
 	 * M[][] Stores the best possible score at this position.
... ...
@@ -138,31 +167,10 @@ score_t match_positions(const char *needle, const char *haystack, size_t *positi
138 167
 	score_t *curr_D, *curr_M;
139 168
 
140 169
 	for (int i = 0; i < n; i++) {
141
-		score_t prev_score = SCORE_MIN;
142
-		score_t gap_score = i == n - 1 ? SCORE_GAP_TRAILING : SCORE_GAP_INNER;
143
-
144 170
 		curr_D = &D[i][0];
145 171
 		curr_M = &M[i][0];
146 172
 
147
-		for (int j = 0; j < m; j++) {
148
-			if (lower_needle[i] == lower_haystack[j]) {
149
-				score_t score = SCORE_MIN;
150
-				if (!i) {
151
-					score = (j * SCORE_GAP_LEADING) + match_bonus[j];
152
-				} else if (j) { /* i > 0 && j > 0*/
153
-					score = max(
154
-					    last_M[j - 1] + match_bonus[j],
155
-
156
-					    /* consecutive match, doesn't stack with match_bonus */
157
-					    last_D[j - 1] + SCORE_MATCH_CONSECUTIVE);
158
-				}
159
-				curr_D[j] = score;
160
-				curr_M[j] = prev_score = max(score, prev_score + gap_score);
161
-			} else {
162
-				curr_D[j] = SCORE_MIN;
163
-				curr_M[j] = prev_score = prev_score + gap_score;
164
-			}
165
-		}
173
+		match_row(&match, i, curr_D, curr_M, last_D, last_M);
166 174
 
167 175
 		last_D = curr_D;
168 176
 		last_M = curr_M;
Browse code

Move some temporary storage into match_struct

John Hawthorn authored on 28/12/2019 02:08:40
Showing 1 changed files
... ...
@@ -58,6 +58,16 @@ void mat_print(score_t *mat, char name, const char *needle, const char *haystack
58 58
 
59 59
 #define MATCH_MAX_LEN 1024
60 60
 
61
+struct match_struct {
62
+	int needle_len;
63
+	int haystack_len;
64
+
65
+	char lower_needle[MATCH_MAX_LEN];
66
+	char lower_haystack[MATCH_MAX_LEN];
67
+
68
+	score_t match_bonus[MATCH_MAX_LEN];
69
+};
70
+
61 71
 static void precompute_bonus(const char *haystack, score_t *match_bonus) {
62 72
 	/* Which positions are beginning of words */
63 73
 	int m = strlen(haystack);
... ...
@@ -69,12 +79,32 @@ static void precompute_bonus(const char *haystack, score_t *match_bonus) {
69 79
 	}
70 80
 }
71 81
 
82
+static void setup_match_struct(struct match_struct *match, const char *needle, const char *haystack) {
83
+	match->needle_len = strlen(needle);
84
+	match->haystack_len = strlen(haystack);
85
+
86
+	if (match->needle_len > MATCH_MAX_LEN || match->needle_len > match->haystack_len) {
87
+		return;
88
+	}
89
+
90
+	for (int i = 0; i < match->needle_len; i++)
91
+		match->lower_needle[i] = tolower(needle[i]);
92
+
93
+	for (int i = 0; i < match->haystack_len; i++)
94
+		match->lower_haystack[i] = tolower(haystack[i]);
95
+
96
+	precompute_bonus(haystack, match->match_bonus);
97
+}
98
+
72 99
 score_t match_positions(const char *needle, const char *haystack, size_t *positions) {
73 100
 	if (!*needle)
74 101
 		return SCORE_MIN;
75 102
 
76
-	int n = strlen(needle);
77
-	int m = strlen(haystack);
103
+	struct match_struct match;
104
+	setup_match_struct(&match, needle, haystack);
105
+
106
+	int n = match.needle_len;
107
+	int m = match.haystack_len;
78 108
 
79 109
 	if (m > MATCH_MAX_LEN || n > m) {
80 110
 		/*
... ...
@@ -94,26 +124,18 @@ score_t match_positions(const char *needle, const char *haystack, size_t *positi
94 124
 		return SCORE_MAX;
95 125
 	}
96 126
 
97
-	char lower_needle[MATCH_MAX_LEN];
98
-	char lower_haystack[MATCH_MAX_LEN];
99
-
100
-	for (int i = 0; i < n; i++)
101
-		lower_needle[i] = tolower(needle[i]);
102
-
103
-	for (int i = 0; i < m; i++)
104
-		lower_haystack[i] = tolower(haystack[i]);
105
-
106
-	score_t match_bonus[MATCH_MAX_LEN];
107
-	score_t D[n][m], M[n][m];
108
-
109
-	score_t *last_D, *last_M;
110
-	score_t *curr_D, *curr_M;
127
+	const char *lower_needle = match.lower_needle;
128
+	const char *lower_haystack = match.lower_haystack;
129
+	const score_t *match_bonus = match.match_bonus;
111 130
 
112 131
 	/*
113 132
 	 * D[][] Stores the best score for this position ending with a match.
114 133
 	 * M[][] Stores the best possible score at this position.
115 134
 	 */
116
-	precompute_bonus(haystack, match_bonus);
135
+	score_t D[n][m], M[n][m];
136
+
137
+	score_t *last_D, *last_M;
138
+	score_t *curr_D, *curr_M;
117 139
 
118 140
 	for (int i = 0; i < n; i++) {
119 141
 		score_t prev_score = SCORE_MIN;
Browse code

Break early if strlen(neddle) > strlen(haystack)

This should never happen if this is called properly (match only returns
valid results if there IS a match), but it's nice to double check here.

John Hawthorn authored on 28/12/2019 01:54:34
Showing 1 changed files
... ...
@@ -76,7 +76,7 @@ score_t match_positions(const char *needle, const char *haystack, size_t *positi
76 76
 	int n = strlen(needle);
77 77
 	int m = strlen(haystack);
78 78
 
79
-	if (m > MATCH_MAX_LEN || n > MATCH_MAX_LEN) {
79
+	if (m > MATCH_MAX_LEN || n > m) {
80 80
 		/*
81 81
 		 * Unreasonably large candidate: return no score
82 82
 		 * If it is a valid match it will still be returned, it will
Browse code

Combine early-return if as else if

John Hawthorn authored on 28/12/2019 01:49:38
Showing 1 changed files
... ...
@@ -83,9 +83,7 @@ score_t match_positions(const char *needle, const char *haystack, size_t *positi
83 83
 		 * just be ranked below any reasonably sized candidates
84 84
 		 */
85 85
 		return SCORE_MIN;
86
-	}
87
-
88
-	if (n == m) {
86
+	} else if (n == m) {
89 87
 		/* Since this method can only be called with a haystack which
90 88
 		 * matches needle. If the lengths of the strings are equal the
91 89
 		 * strings themselves must also be equal (ignoring case).
Browse code

Work with row pointers

John Hawthorn authored on 28/12/2019 01:47:32
Showing 1 changed files
... ...
@@ -108,6 +108,9 @@ score_t match_positions(const char *needle, const char *haystack, size_t *positi
108 108
 	score_t match_bonus[MATCH_MAX_LEN];
109 109
 	score_t D[n][m], M[n][m];
110 110
 
111
+	score_t *last_D, *last_M;
112
+	score_t *curr_D, *curr_M;
113
+
111 114
 	/*
112 115
 	 * D[][] Stores the best score for this position ending with a match.
113 116
 	 * M[][] Stores the best possible score at this position.
... ...
@@ -118,6 +121,9 @@ score_t match_positions(const char *needle, const char *haystack, size_t *positi
118 121
 		score_t prev_score = SCORE_MIN;
119 122
 		score_t gap_score = i == n - 1 ? SCORE_GAP_TRAILING : SCORE_GAP_INNER;
120 123
 
124
+		curr_D = &D[i][0];
125
+		curr_M = &M[i][0];
126
+
121 127
 		for (int j = 0; j < m; j++) {
122 128
 			if (lower_needle[i] == lower_haystack[j]) {
123 129
 				score_t score = SCORE_MIN;
... ...
@@ -125,18 +131,21 @@ score_t match_positions(const char *needle, const char *haystack, size_t *positi
125 131
 					score = (j * SCORE_GAP_LEADING) + match_bonus[j];
126 132
 				} else if (j) { /* i > 0 && j > 0*/
127 133
 					score = max(
128
-					    M[i - 1][j - 1] + match_bonus[j],
134
+					    last_M[j - 1] + match_bonus[j],
129 135
 
130 136
 					    /* consecutive match, doesn't stack with match_bonus */
131
-					    D[i - 1][j - 1] + SCORE_MATCH_CONSECUTIVE);
137
+					    last_D[j - 1] + SCORE_MATCH_CONSECUTIVE);
132 138
 				}
133
-				D[i][j] = score;
134
-				M[i][j] = prev_score = max(score, prev_score + gap_score);
139
+				curr_D[j] = score;
140
+				curr_M[j] = prev_score = max(score, prev_score + gap_score);
135 141
 			} else {
136
-				D[i][j] = SCORE_MIN;
137
-				M[i][j] = prev_score = prev_score + gap_score;
142
+				curr_D[j] = SCORE_MIN;
143
+				curr_M[j] = prev_score = prev_score + gap_score;
138 144
 			}
139 145
 		}
146
+
147
+		last_D = curr_D;
148
+		last_M = curr_M;
140 149
 	}
141 150
 
142 151
 #ifdef DEBUG_VERBOSE
Browse code

Avoid VLA for match_bonus

John Hawthorn authored on 28/12/2019 01:38:22
Showing 1 changed files
... ...
@@ -105,7 +105,7 @@ score_t match_positions(const char *needle, const char *haystack, size_t *positi
105 105
 	for (int i = 0; i < m; i++)
106 106
 		lower_haystack[i] = tolower(haystack[i]);
107 107
 
108
-	score_t match_bonus[m];
108
+	score_t match_bonus[MATCH_MAX_LEN];
109 109
 	score_t D[n][m], M[n][m];
110 110
 
111 111
 	/*
Browse code

Avoid VLA for lower_{needle,haystack}

John Hawthorn authored on 28/12/2019 01:33:57
Showing 1 changed files
... ...
@@ -56,6 +56,8 @@ void mat_print(score_t *mat, char name, const char *needle, const char *haystack
56 56
 }
57 57
 #endif
58 58
 
59
+#define MATCH_MAX_LEN 1024
60
+
59 61
 static void precompute_bonus(const char *haystack, score_t *match_bonus) {
60 62
 	/* Which positions are beginning of words */
61 63
 	int m = strlen(haystack);
... ...
@@ -74,6 +76,15 @@ score_t match_positions(const char *needle, const char *haystack, size_t *positi
74 76
 	int n = strlen(needle);
75 77
 	int m = strlen(haystack);
76 78
 
79
+	if (m > MATCH_MAX_LEN || n > MATCH_MAX_LEN) {
80
+		/*
81
+		 * Unreasonably large candidate: return no score
82
+		 * If it is a valid match it will still be returned, it will
83
+		 * just be ranked below any reasonably sized candidates
84
+		 */
85
+		return SCORE_MIN;
86
+	}
87
+
77 88
 	if (n == m) {
78 89
 		/* Since this method can only be called with a haystack which
79 90
 		 * matches needle. If the lengths of the strings are equal the
... ...
@@ -85,17 +96,8 @@ score_t match_positions(const char *needle, const char *haystack, size_t *positi
85 96
 		return SCORE_MAX;
86 97
 	}
87 98
 
88
-	if (m > 1024) {
89
-		/*
90
-		 * Unreasonably large candidate: return no score
91
-		 * If it is a valid match it will still be returned, it will
92
-		 * just be ranked below any reasonably sized candidates
93
-		 */
94
-		return SCORE_MIN;
95
-	}
96
-
97
-	char lower_needle[n];
98
-	char lower_haystack[m];
99
+	char lower_needle[MATCH_MAX_LEN];
100
+	char lower_haystack[MATCH_MAX_LEN];
99 101
 
100 102
 	for (int i = 0; i < n; i++)
101 103
 		lower_needle[i] = tolower(needle[i]);
Browse code

Precompute tolower in match_positions

This makes match_positions significantly faster on MacOS by calling
tolower() O(N) times instead of O(N*N)

Before:

$ time ./fzy -e linux --benchmark < linux_files.txt
./fzy -e linux --benchmark < linux_files.txt 13.24s user 0.03s system 381% cpu 3.483 total

After:

$ time ./fzy -e linux --benchmark < linux_files.txt
./fzy -e linux --benchmark < linux_files.txt 4.57s user 0.02s system 381% cpu 1.204 total

John Hawthorn authored on 13/10/2018 21:05:10
Showing 1 changed files
... ...
@@ -94,6 +94,15 @@ score_t match_positions(const char *needle, const char *haystack, size_t *positi
94 94
 		return SCORE_MIN;
95 95
 	}
96 96
 
97
+	char lower_needle[n];
98
+	char lower_haystack[m];
99
+
100
+	for (int i = 0; i < n; i++)
101
+		lower_needle[i] = tolower(needle[i]);
102
+
103
+	for (int i = 0; i < m; i++)
104
+		lower_haystack[i] = tolower(haystack[i]);
105
+
97 106
 	score_t match_bonus[m];
98 107
 	score_t D[n][m], M[n][m];
99 108
 
... ...
@@ -108,7 +117,7 @@ score_t match_positions(const char *needle, const char *haystack, size_t *positi
108 117
 		score_t gap_score = i == n - 1 ? SCORE_GAP_TRAILING : SCORE_GAP_INNER;
109 118
 
110 119
 		for (int j = 0; j < m; j++) {
111
-			if (tolower(needle[i]) == tolower(haystack[j])) {
120
+			if (lower_needle[i] == lower_haystack[j]) {
112 121
 				score_t score = SCORE_MIN;
113 122
 				if (!i) {
114 123
 					score = (j * SCORE_GAP_LEADING) + match_bonus[j];
Browse code

Use standards-compliant lookup table

John Hawthorn authored on 10/07/2016 19:42:06
Showing 1 changed files
... ...
@@ -6,6 +6,7 @@
6 6
 #include <math.h>
7 7
 
8 8
 #include "match.h"
9
+#include "bonus.h"
9 10
 
10 11
 #include "../config.h"
11 12
 
... ...
@@ -55,38 +56,13 @@ void mat_print(score_t *mat, char name, const char *needle, const char *haystack
55 56
 }
56 57
 #endif
57 58
 
58
-const score_t bonus_states[][256] = {
59
-	{ 0 },
60
-	{
61
-		['/'] = SCORE_MATCH_SLASH,
62
-		['-'] = SCORE_MATCH_WORD,
63
-		['_'] = SCORE_MATCH_WORD,
64
-		[' '] = SCORE_MATCH_WORD,
65
-		['.'] = SCORE_MATCH_DOT,
66
-	},
67
-	{
68
-		['a' ... 'z'] = SCORE_MATCH_CAPITAL,
69
-		['/'] = SCORE_MATCH_SLASH,
70
-		['-'] = SCORE_MATCH_WORD,
71
-		['_'] = SCORE_MATCH_WORD,
72
-		[' '] = SCORE_MATCH_WORD,
73
-		['.'] = SCORE_MATCH_DOT,
74
-	},
75
-};
76
-
77
-const size_t bonus_index[256] = {
78
-	['A' ... 'Z'] = 2,
79
-	['a' ... 'z'] = 1,
80
-	['0' ... '9'] = 1,
81
-};
82
-
83 59
 static void precompute_bonus(const char *haystack, score_t *match_bonus) {
84 60
 	/* Which positions are beginning of words */
85 61
 	int m = strlen(haystack);
86 62
 	char last_ch = '/';
87 63
 	for (int i = 0; i < m; i++) {
88 64
 		char ch = haystack[i];
89
-		match_bonus[i] = bonus_states[bonus_index[(size_t)ch]][(size_t)last_ch];
65
+		match_bonus[i] = COMPUTE_BONUS(last_ch, ch);
90 66
 		last_ch = ch;
91 67
 	}
92 68
 }
Browse code

Use a lookup table for precompute_bonuses

John Hawthorn authored on 10/07/2016 19:46:59
Showing 1 changed files
... ...
@@ -55,28 +55,38 @@ void mat_print(score_t *mat, char name, const char *needle, const char *haystack
55 55
 }
56 56
 #endif
57 57
 
58
+const score_t bonus_states[][256] = {
59
+	{ 0 },
60
+	{
61
+		['/'] = SCORE_MATCH_SLASH,
62
+		['-'] = SCORE_MATCH_WORD,
63
+		['_'] = SCORE_MATCH_WORD,
64
+		[' '] = SCORE_MATCH_WORD,
65
+		['.'] = SCORE_MATCH_DOT,
66
+	},
67
+	{
68
+		['a' ... 'z'] = SCORE_MATCH_CAPITAL,
69
+		['/'] = SCORE_MATCH_SLASH,
70
+		['-'] = SCORE_MATCH_WORD,
71
+		['_'] = SCORE_MATCH_WORD,
72
+		[' '] = SCORE_MATCH_WORD,
73
+		['.'] = SCORE_MATCH_DOT,
74
+	},
75
+};
76
+
77
+const size_t bonus_index[256] = {
78
+	['A' ... 'Z'] = 2,
79
+	['a' ... 'z'] = 1,
80
+	['0' ... '9'] = 1,
81
+};
82
+
58 83
 static void precompute_bonus(const char *haystack, score_t *match_bonus) {
59 84
 	/* Which positions are beginning of words */
60 85
 	int m = strlen(haystack);
61
-	char last_ch = '\0';
86
+	char last_ch = '/';
62 87
 	for (int i = 0; i < m; i++) {
63 88
 		char ch = haystack[i];
64
-
65
-		score_t score = 0;
66
-		if (isalnum(ch)) {
67
-			if (!last_ch || last_ch == '/') {
68
-				score = SCORE_MATCH_SLASH;
69
-			} else if (last_ch == '-' || last_ch == '_' || last_ch == ' ') {
70
-				score = SCORE_MATCH_WORD;
71
-			} else if (last_ch >= 'a' && last_ch <= 'z' && ch >= 'A' && ch <= 'Z') {
72
-				/* CamelCase */
73
-				score = SCORE_MATCH_CAPITAL;
74
-			} else if (last_ch == '.') {
75
-				score = SCORE_MATCH_DOT;
76
-			}
77
-		}
78
-
79
-		match_bonus[i] = score;
89
+		match_bonus[i] = bonus_states[bonus_index[(size_t)ch]][(size_t)last_ch];
80 90
 		last_ch = ch;
81 91
 	}
82 92
 }
Browse code

Don't consider numbers word separators

This made sense on paper when deciding what was a "word". However in
reality this is rarely an indication of a separate word. I've found that
this caused hexadecimal or base64 strings to be favoured in matches.

John Hawthorn authored on 27/06/2016 06:10:25
Showing 1 changed files
... ...
@@ -66,8 +66,7 @@ static void precompute_bonus(const char *haystack, score_t *match_bonus) {
66 66
 		if (isalnum(ch)) {
67 67
 			if (!last_ch || last_ch == '/') {
68 68
 				score = SCORE_MATCH_SLASH;
69
-			} else if (last_ch == '-' || last_ch == '_' || last_ch == ' ' ||
70
-				   (last_ch >= '0' && last_ch <= '9')) {
69
+			} else if (last_ch == '-' || last_ch == '_' || last_ch == ' ') {
71 70
 				score = SCORE_MATCH_WORD;
72 71
 			} else if (last_ch >= 'a' && last_ch <= 'z' && ch >= 'A' && ch <= 'Z') {
73 72
 				/* CamelCase */
Browse code

Split bonus computation into own method

John Hawthorn authored on 27/06/2016 06:10:00
Showing 1 changed files
... ...
@@ -55,6 +55,33 @@ void mat_print(score_t *mat, char name, const char *needle, const char *haystack
55 55
 }
56 56
 #endif
57 57
 
58
+static void precompute_bonus(const char *haystack, score_t *match_bonus) {
59
+	/* Which positions are beginning of words */
60
+	int m = strlen(haystack);
61
+	char last_ch = '\0';
62
+	for (int i = 0; i < m; i++) {
63
+		char ch = haystack[i];
64
+
65
+		score_t score = 0;
66
+		if (isalnum(ch)) {
67
+			if (!last_ch || last_ch == '/') {
68
+				score = SCORE_MATCH_SLASH;
69
+			} else if (last_ch == '-' || last_ch == '_' || last_ch == ' ' ||
70
+				   (last_ch >= '0' && last_ch <= '9')) {
71
+				score = SCORE_MATCH_WORD;
72
+			} else if (last_ch >= 'a' && last_ch <= 'z' && ch >= 'A' && ch <= 'Z') {
73
+				/* CamelCase */
74
+				score = SCORE_MATCH_CAPITAL;
75
+			} else if (last_ch == '.') {
76
+				score = SCORE_MATCH_DOT;
77
+			}
78
+		}
79
+
80
+		match_bonus[i] = score;
81
+		last_ch = ch;
82
+	}
83
+}
84
+
58 85
 score_t match_positions(const char *needle, const char *haystack, size_t *positions) {
59 86
 	if (!*needle)
60 87
 		return SCORE_MIN;
... ...
@@ -89,30 +116,7 @@ score_t match_positions(const char *needle, const char *haystack, size_t *positi
89 116
 	 * D[][] Stores the best score for this position ending with a match.
90 117
 	 * M[][] Stores the best possible score at this position.
91 118
 	 */
92
-
93
-	/* Which positions are beginning of words */
94
-	char last_ch = '\0';
95
-	for (int i = 0; i < m; i++) {
96
-		char ch = haystack[i];
97
-
98
-		score_t score = 0;
99
-		if (isalnum(ch)) {
100
-			if (!last_ch || last_ch == '/') {
101
-				score = SCORE_MATCH_SLASH;
102
-			} else if (last_ch == '-' || last_ch == '_' || last_ch == ' ' ||
103
-				   (last_ch >= '0' && last_ch <= '9')) {
104
-				score = SCORE_MATCH_WORD;
105
-			} else if (last_ch >= 'a' && last_ch <= 'z' && ch >= 'A' && ch <= 'Z') {
106
-				/* CamelCase */
107
-				score = SCORE_MATCH_CAPITAL;
108
-			} else if (last_ch == '.') {
109
-				score = SCORE_MATCH_DOT;
110
-			}
111
-		}
112
-
113
-		match_bonus[i] = score;
114
-		last_ch = ch;
115
-	}
119
+	precompute_bonus(haystack, match_bonus);
116 120
 
117 121
 	for (int i = 0; i < n; i++) {
118 122
 		score_t prev_score = SCORE_MIN;
Browse code

Change match into if/else

This is marginally faster and I still think it reads very well (maybe
better).

John Hawthorn authored on 14/06/2016 06:55:58
Showing 1 changed files
... ...
@@ -117,9 +117,10 @@ score_t match_positions(const char *needle, const char *haystack, size_t *positi
117 117
 	for (int i = 0; i < n; i++) {
118 118
 		score_t prev_score = SCORE_MIN;
119 119
 		score_t gap_score = i == n - 1 ? SCORE_GAP_TRAILING : SCORE_GAP_INNER;
120
+
120 121
 		for (int j = 0; j < m; j++) {
121
-			score_t score = SCORE_MIN;
122 122
 			if (tolower(needle[i]) == tolower(haystack[j])) {
123
+				score_t score = SCORE_MIN;
123 124
 				if (!i) {
124 125
 					score = (j * SCORE_GAP_LEADING) + match_bonus[j];
125 126
 				} else if (j) { /* i > 0 && j > 0*/
... ...
@@ -129,9 +130,12 @@ score_t match_positions(const char *needle, const char *haystack, size_t *positi
129 130
 					    /* consecutive match, doesn't stack with match_bonus */
130 131
 					    D[i - 1][j - 1] + SCORE_MATCH_CONSECUTIVE);
131 132
 				}
133
+				D[i][j] = score;
134
+				M[i][j] = prev_score = max(score, prev_score + gap_score);
135
+			} else {
136
+				D[i][j] = SCORE_MIN;
137
+				M[i][j] = prev_score = prev_score + gap_score;
132 138
 			}
133
-			D[i][j] = score;
134
-			M[i][j] = prev_score = max(score, prev_score + gap_score);
135 139
 		}
136 140
 	}
137 141
 
Browse code

Remove calculate_score (same as match_positions)

John Hawthorn authored on 09/06/2016 00:37:38
Showing 1 changed files
... ...
@@ -55,7 +55,7 @@ void mat_print(score_t *mat, char name, const char *needle, const char *haystack
55 55
 }
56 56
 #endif
57 57
 
58
-score_t calculate_score(const char *needle, const char *haystack, size_t *positions) {
58
+score_t match_positions(const char *needle, const char *haystack, size_t *positions) {
59 59
 	if (!*needle)
60 60
 		return SCORE_MIN;
61 61
 
... ...
@@ -174,10 +174,6 @@ score_t calculate_score(const char *needle, const char *haystack, size_t *positi
174 174
 	return M[n - 1][m - 1];
175 175
 }
176 176
 
177
-score_t match_positions(const char *needle, const char *haystack, size_t *positions) {
178
-	return calculate_score(needle, haystack, positions);
179
-}
180
-
181 177
 score_t match(const char *needle, const char *haystack) {
182 178
 	return match_positions(needle, haystack, NULL);
183 179
 }
Browse code

Move equality detection within calculate_score

John Hawthorn authored on 08/06/2016 06:28:31
Showing 1 changed files
... ...
@@ -56,12 +56,23 @@ void mat_print(score_t *mat, char name, const char *needle, const char *haystack
56 56
 #endif
57 57
 
58 58
 score_t calculate_score(const char *needle, const char *haystack, size_t *positions) {
59
-	if (!*haystack || !*needle)
59
+	if (!*needle)
60 60
 		return SCORE_MIN;
61 61
 
62 62
 	int n = strlen(needle);
63 63
 	int m = strlen(haystack);
64 64
 
65
+	if (n == m) {
66
+		/* Since this method can only be called with a haystack which
67
+		 * matches needle. If the lengths of the strings are equal the
68
+		 * strings themselves must also be equal (ignoring case).
69
+		 */
70
+		if (positions)
71
+			for (int i = 0; i < n; i++)
72
+				positions[i] = i;
73
+		return SCORE_MAX;
74
+	}
75
+
65 76
 	if (m > 1024) {
66 77
 		/*
67 78
 		 * Unreasonably large candidate: return no score
... ...
@@ -164,18 +175,7 @@ score_t calculate_score(const char *needle, const char *haystack, size_t *positi
164 175
 }
165 176
 
166 177
 score_t match_positions(const char *needle, const char *haystack, size_t *positions) {
167
-	if (!*needle) {
168
-		return SCORE_MAX;
169
-	} else if (!strcasecmp(needle, haystack)) {
170
-		if (positions) {
171
-			int n = strlen(needle);
172
-			for (int i = 0; i < n; i++)
173
-				positions[i] = i;
174
-		}
175
-		return SCORE_MAX;
176
-	} else {
177
-		return calculate_score(needle, haystack, positions);
178
-	}
178
+	return calculate_score(needle, haystack, positions);
179 179
 }
180 180
 
181 181
 score_t match(const char *needle, const char *haystack) {
Browse code

Move sources into src directory

John Hawthorn authored on 21/05/2016 21:56:03
Showing 1 changed files
1 1
new file mode 100644
... ...
@@ -0,0 +1,183 @@
1
+#include <ctype.h>
2
+#include <string.h>
3
+#include <strings.h>
4
+#include <stdio.h>
5
+#include <float.h>
6
+#include <math.h>
7
+
8
+#include "match.h"
9
+
10
+#include "../config.h"
11
+
12
+char *strcasechr(const char *s, char c) {
13
+	const char accept[3] = {c, toupper(c), 0};
14
+	return strpbrk(s, accept);
15
+}
16
+
17
+int has_match(const char *needle, const char *haystack) {
18
+	while (*needle) {
19
+		char nch = *needle++;
20
+
21
+		if (!(haystack = strcasechr(haystack, nch))) {
22
+			return 0;
23
+		}
24
+		haystack++;
25
+	}
26
+	return 1;
27
+}
28
+
29
+#define max(a, b) (((a) > (b)) ? (a) : (b))
30
+
31
+#ifdef DEBUG_VERBOSE
32
+/* print one of the internal matrices */
33
+void mat_print(score_t *mat, char name, const char *needle, const char *haystack) {
34
+	int n = strlen(needle);
35
+	int m = strlen(haystack);
36
+	int i, j;
37
+	fprintf(stderr, "%c   ", name);
38
+	for (j = 0; j < m; j++) {
39
+		fprintf(stderr, "     %c", haystack[j]);
40
+	}
41
+	fprintf(stderr, "\n");
42
+	for (i = 0; i < n; i++) {
43
+		fprintf(stderr, " %c |", needle[i]);
44
+		for (j = 0; j < m; j++) {
45
+			score_t val = mat[i * m + j];
46
+			if (val == SCORE_MIN) {
47
+				fprintf(stderr, "    -\u221E");
48
+			} else {
49
+				fprintf(stderr, " %.3f", val);
50
+			}
51
+		}
52
+		fprintf(stderr, "\n");
53
+	}
54
+	fprintf(stderr, "\n\n");
55
+}
56
+#endif
57
+
58
+score_t calculate_score(const char *needle, const char *haystack, size_t *positions) {
59
+	if (!*haystack || !*needle)
60
+		return SCORE_MIN;
61
+
62
+	int n = strlen(needle);
63
+	int m = strlen(haystack);
64
+
65
+	if (m > 1024) {
66
+		/*
67
+		 * Unreasonably large candidate: return no score
68
+		 * If it is a valid match it will still be returned, it will
69
+		 * just be ranked below any reasonably sized candidates
70
+		 */
71
+		return SCORE_MIN;
72
+	}
73
+
74
+	score_t match_bonus[m];
75
+	score_t D[n][m], M[n][m];
76
+
77
+	/*
78
+	 * D[][] Stores the best score for this position ending with a match.
79
+	 * M[][] Stores the best possible score at this position.
80
+	 */
81
+
82
+	/* Which positions are beginning of words */
83
+	char last_ch = '\0';
84
+	for (int i = 0; i < m; i++) {
85
+		char ch = haystack[i];
86
+
87
+		score_t score = 0;
88
+		if (isalnum(ch)) {
89
+			if (!last_ch || last_ch == '/') {
90
+				score = SCORE_MATCH_SLASH;
91
+			} else if (last_ch == '-' || last_ch == '_' || last_ch == ' ' ||
92
+				   (last_ch >= '0' && last_ch <= '9')) {
93
+				score = SCORE_MATCH_WORD;
94
+			} else if (last_ch >= 'a' && last_ch <= 'z' && ch >= 'A' && ch <= 'Z') {
95
+				/* CamelCase */
96
+				score = SCORE_MATCH_CAPITAL;
97
+			} else if (last_ch == '.') {
98
+				score = SCORE_MATCH_DOT;
99
+			}
100
+		}
101
+
102
+		match_bonus[i] = score;
103
+		last_ch = ch;
104
+	}
105
+
106
+	for (int i = 0; i < n; i++) {
107
+		score_t prev_score = SCORE_MIN;
108
+		score_t gap_score = i == n - 1 ? SCORE_GAP_TRAILING : SCORE_GAP_INNER;
109
+		for (int j = 0; j < m; j++) {
110
+			score_t score = SCORE_MIN;
111
+			if (tolower(needle[i]) == tolower(haystack[j])) {
112
+				if (!i) {
113
+					score = (j * SCORE_GAP_LEADING) + match_bonus[j];
114
+				} else if (j) { /* i > 0 && j > 0*/
115
+					score = max(
116
+					    M[i - 1][j - 1] + match_bonus[j],
117
+
118
+					    /* consecutive match, doesn't stack with match_bonus */
119
+					    D[i - 1][j - 1] + SCORE_MATCH_CONSECUTIVE);
120
+				}
121
+			}
122
+			D[i][j] = score;
123
+			M[i][j] = prev_score = max(score, prev_score + gap_score);
124
+		}
125
+	}
126
+
127
+#ifdef DEBUG_VERBOSE
128
+	fprintf(stderr, "\"%s\" =~ \"%s\"\n", needle, haystack);
129
+	mat_print(&D[0][0], 'D', needle, haystack);
130
+	mat_print(&M[0][0], 'M', needle, haystack);
131
+	fprintf(stderr, "\n");
132
+#endif
133
+
134
+	/* backtrace to find the positions of optimal matching */
135
+	if (positions) {
136
+		int match_required = 0;
137
+		for (int i = n - 1, j = m - 1; i >= 0; i--) {
138
+			for (; j >= 0; j--) {
139
+				/*
140
+				 * There may be multiple paths which result in
141
+				 * the optimal weight.
142
+				 *
143
+				 * For simplicity, we will pick the first one
144
+				 * we encounter, the latest in the candidate
145
+				 * string.
146
+				 */
147
+				if (D[i][j] != SCORE_MIN &&
148
+				    (match_required || D[i][j] == M[i][j])) {
149
+					/* If this score was determined using
150
+					 * SCORE_MATCH_CONSECUTIVE, the
151
+					 * previous character MUST be a match
152
+					 */
153
+					match_required =
154
+					    i && j &&
155
+					    M[i][j] == D[i - 1][j - 1] + SCORE_MATCH_CONSECUTIVE;
156
+					positions[i] = j--;
157
+					break;
158
+				}
159
+			}
160
+		}
161
+	}
162
+
163
+	return M[n - 1][m - 1];
164
+}
165
+
166
+score_t match_positions(const char *needle, const char *haystack, size_t *positions) {
167
+	if (!*needle) {
168
+		return SCORE_MAX;
169
+	} else if (!strcasecmp(needle, haystack)) {
170
+		if (positions) {
171
+			int n = strlen(needle);
172
+			for (int i = 0; i < n; i++)
173
+				positions[i] = i;
174
+		}
175
+		return SCORE_MAX;
176
+	} else {
177
+		return calculate_score(needle, haystack, positions);
178
+	}
179
+}
180
+
181
+score_t match(const char *needle, const char *haystack) {
182
+	return match_positions(needle, haystack, NULL);
183
+}