sifter.js 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472
  1. /**
  2. * sifter.js
  3. * Copyright (c) 2013 Brian Reavis & contributors
  4. *
  5. * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this
  6. * file except in compliance with the License. You may obtain a copy of the License at:
  7. * http://www.apache.org/licenses/LICENSE-2.0
  8. *
  9. * Unless required by applicable law or agreed to in writing, software distributed under
  10. * the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
  11. * ANY KIND, either express or implied. See the License for the specific language
  12. * governing permissions and limitations under the License.
  13. *
  14. * @author Brian Reavis <brian@thirdroute.com>
  15. */
  16. (function(root, factory) {
  17. if (typeof define === 'function' && define.amd) {
  18. define(factory);
  19. } else if (typeof exports === 'object') {
  20. module.exports = factory();
  21. } else {
  22. root.Sifter = factory();
  23. }
  24. }(this, function() {
  25. /**
  26. * Textually searches arrays and hashes of objects
  27. * by property (or multiple properties). Designed
  28. * specifically for autocomplete.
  29. *
  30. * @constructor
  31. * @param {array|object} items
  32. * @param {object} items
  33. */
  34. var Sifter = function(items, settings) {
  35. this.items = items;
  36. this.settings = settings || {diacritics: true};
  37. };
  38. /**
  39. * Splits a search string into an array of individual
  40. * regexps to be used to match results.
  41. *
  42. * @param {string} query
  43. * @returns {array}
  44. */
  45. Sifter.prototype.tokenize = function(query) {
  46. query = trim(String(query || '').toLowerCase());
  47. if (!query || !query.length) return [];
  48. var i, n, regex, letter;
  49. var tokens = [];
  50. var words = query.split(/ +/);
  51. for (i = 0, n = words.length; i < n; i++) {
  52. regex = escape_regex(words[i]);
  53. if (this.settings.diacritics) {
  54. for (letter in DIACRITICS) {
  55. if (DIACRITICS.hasOwnProperty(letter)) {
  56. regex = regex.replace(new RegExp(letter, 'g'), DIACRITICS[letter]);
  57. }
  58. }
  59. }
  60. tokens.push({
  61. string : words[i],
  62. regex : new RegExp(regex, 'i')
  63. });
  64. }
  65. return tokens;
  66. };
  67. /**
  68. * Iterates over arrays and hashes.
  69. *
  70. * ```
  71. * this.iterator(this.items, function(item, id) {
  72. * // invoked for each item
  73. * });
  74. * ```
  75. *
  76. * @param {array|object} object
  77. */
  78. Sifter.prototype.iterator = function(object, callback) {
  79. var iterator;
  80. if (is_array(object)) {
  81. iterator = Array.prototype.forEach || function(callback) {
  82. for (var i = 0, n = this.length; i < n; i++) {
  83. callback(this[i], i, this);
  84. }
  85. };
  86. } else {
  87. iterator = function(callback) {
  88. for (var key in this) {
  89. if (this.hasOwnProperty(key)) {
  90. callback(this[key], key, this);
  91. }
  92. }
  93. };
  94. }
  95. iterator.apply(object, [callback]);
  96. };
  97. /**
  98. * Returns a function to be used to score individual results.
  99. *
  100. * Good matches will have a higher score than poor matches.
  101. * If an item is not a match, 0 will be returned by the function.
  102. *
  103. * @param {object|string} search
  104. * @param {object} options (optional)
  105. * @returns {function}
  106. */
  107. Sifter.prototype.getScoreFunction = function(search, options) {
  108. var self, fields, tokens, token_count;
  109. self = this;
  110. search = self.prepareSearch(search, options);
  111. tokens = search.tokens;
  112. fields = search.options.fields;
  113. token_count = tokens.length;
  114. /**
  115. * Calculates how close of a match the
  116. * given value is against a search token.
  117. *
  118. * @param {mixed} value
  119. * @param {object} token
  120. * @return {number}
  121. */
  122. var scoreValue = function(value, token) {
  123. var score, pos;
  124. if (!value) return 0;
  125. value = String(value || '');
  126. pos = value.search(token.regex);
  127. if (pos === -1) return 0;
  128. score = token.string.length / value.length;
  129. if (pos === 0) score += 0.5;
  130. return score;
  131. };
  132. /**
  133. * Calculates the score of an object
  134. * against the search query.
  135. *
  136. * @param {object} token
  137. * @param {object} data
  138. * @return {number}
  139. */
  140. var scoreObject = (function() {
  141. var field_count = fields.length;
  142. if (!field_count) {
  143. return function() { return 0; };
  144. }
  145. if (field_count === 1) {
  146. return function(token, data) {
  147. return scoreValue(data[fields[0]], token);
  148. };
  149. }
  150. return function(token, data) {
  151. for (var i = 0, sum = 0; i < field_count; i++) {
  152. sum += scoreValue(data[fields[i]], token);
  153. }
  154. return sum / field_count;
  155. };
  156. })();
  157. if (!token_count) {
  158. return function() { return 0; };
  159. }
  160. if (token_count === 1) {
  161. return function(data) {
  162. return scoreObject(tokens[0], data);
  163. };
  164. }
  165. if (search.options.conjunction === 'and') {
  166. return function(data) {
  167. var score;
  168. for (var i = 0, sum = 0; i < token_count; i++) {
  169. score = scoreObject(tokens[i], data);
  170. if (score <= 0) return 0;
  171. sum += score;
  172. }
  173. return sum / token_count;
  174. };
  175. } else {
  176. return function(data) {
  177. for (var i = 0, sum = 0; i < token_count; i++) {
  178. sum += scoreObject(tokens[i], data);
  179. }
  180. return sum / token_count;
  181. };
  182. }
  183. };
  184. /**
  185. * Returns a function that can be used to compare two
  186. * results, for sorting purposes. If no sorting should
  187. * be performed, `null` will be returned.
  188. *
  189. * @param {string|object} search
  190. * @param {object} options
  191. * @return function(a,b)
  192. */
  193. Sifter.prototype.getSortFunction = function(search, options) {
  194. var i, n, self, field, fields, fields_count, multiplier, multipliers, get_field, implicit_score, sort;
  195. self = this;
  196. search = self.prepareSearch(search, options);
  197. sort = (!search.query && options.sort_empty) || options.sort;
  198. /**
  199. * Fetches the specified sort field value
  200. * from a search result item.
  201. *
  202. * @param {string} name
  203. * @param {object} result
  204. * @return {mixed}
  205. */
  206. get_field = function(name, result) {
  207. if (name === '$score') return result.score;
  208. return self.items[result.id][name];
  209. };
  210. // parse options
  211. fields = [];
  212. if (sort) {
  213. for (i = 0, n = sort.length; i < n; i++) {
  214. if (search.query || sort[i].field !== '$score') {
  215. fields.push(sort[i]);
  216. }
  217. }
  218. }
  219. // the "$score" field is implied to be the primary
  220. // sort field, unless it's manually specified
  221. if (search.query) {
  222. implicit_score = true;
  223. for (i = 0, n = fields.length; i < n; i++) {
  224. if (fields[i].field === '$score') {
  225. implicit_score = false;
  226. break;
  227. }
  228. }
  229. if (implicit_score) {
  230. fields.unshift({field: '$score', direction: 'desc'});
  231. }
  232. } else {
  233. for (i = 0, n = fields.length; i < n; i++) {
  234. if (fields[i].field === '$score') {
  235. fields.splice(i, 1);
  236. break;
  237. }
  238. }
  239. }
  240. multipliers = [];
  241. for (i = 0, n = fields.length; i < n; i++) {
  242. multipliers.push(fields[i].direction === 'desc' ? -1 : 1);
  243. }
  244. // build function
  245. fields_count = fields.length;
  246. if (!fields_count) {
  247. return null;
  248. } else if (fields_count === 1) {
  249. field = fields[0].field;
  250. multiplier = multipliers[0];
  251. return function(a, b) {
  252. return multiplier * cmp(
  253. get_field(field, a),
  254. get_field(field, b)
  255. );
  256. };
  257. } else {
  258. return function(a, b) {
  259. var i, result, a_value, b_value, field;
  260. for (i = 0; i < fields_count; i++) {
  261. field = fields[i].field;
  262. result = multipliers[i] * cmp(
  263. get_field(field, a),
  264. get_field(field, b)
  265. );
  266. if (result) return result;
  267. }
  268. return 0;
  269. };
  270. }
  271. };
  272. /**
  273. * Parses a search query and returns an object
  274. * with tokens and fields ready to be populated
  275. * with results.
  276. *
  277. * @param {string} query
  278. * @param {object} options
  279. * @returns {object}
  280. */
  281. Sifter.prototype.prepareSearch = function(query, options) {
  282. if (typeof query === 'object') return query;
  283. options = extend({}, options);
  284. var option_fields = options.fields;
  285. var option_sort = options.sort;
  286. var option_sort_empty = options.sort_empty;
  287. if (option_fields && !is_array(option_fields)) options.fields = [option_fields];
  288. if (option_sort && !is_array(option_sort)) options.sort = [option_sort];
  289. if (option_sort_empty && !is_array(option_sort_empty)) options.sort_empty = [option_sort_empty];
  290. return {
  291. options : options,
  292. query : String(query || '').toLowerCase(),
  293. tokens : this.tokenize(query),
  294. total : 0,
  295. items : []
  296. };
  297. };
  298. /**
  299. * Searches through all items and returns a sorted array of matches.
  300. *
  301. * The `options` parameter can contain:
  302. *
  303. * - fields {string|array}
  304. * - sort {array}
  305. * - score {function}
  306. * - filter {bool}
  307. * - limit {integer}
  308. *
  309. * Returns an object containing:
  310. *
  311. * - options {object}
  312. * - query {string}
  313. * - tokens {array}
  314. * - total {int}
  315. * - items {array}
  316. *
  317. * @param {string} query
  318. * @param {object} options
  319. * @returns {object}
  320. */
  321. Sifter.prototype.search = function(query, options) {
  322. var self = this, value, score, search, calculateScore;
  323. var fn_sort;
  324. var fn_score;
  325. search = this.prepareSearch(query, options);
  326. options = search.options;
  327. query = search.query;
  328. // generate result scoring function
  329. fn_score = options.score || self.getScoreFunction(search);
  330. // perform search and sort
  331. if (query.length) {
  332. self.iterator(self.items, function(item, id) {
  333. score = fn_score(item);
  334. if (options.filter === false || score > 0) {
  335. search.items.push({'score': score, 'id': id});
  336. }
  337. });
  338. } else {
  339. self.iterator(self.items, function(item, id) {
  340. search.items.push({'score': 1, 'id': id});
  341. });
  342. }
  343. fn_sort = self.getSortFunction(search, options);
  344. if (fn_sort) search.items.sort(fn_sort);
  345. // apply limits
  346. search.total = search.items.length;
  347. if (typeof options.limit === 'number') {
  348. search.items = search.items.slice(0, options.limit);
  349. }
  350. return search;
  351. };
  352. // utilities
  353. // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  354. var cmp = function(a, b) {
  355. if (typeof a === 'number' && typeof b === 'number') {
  356. return a > b ? 1 : (a < b ? -1 : 0);
  357. }
  358. a = asciifold(String(a || ''));
  359. b = asciifold(String(b || ''));
  360. if (a > b) return 1;
  361. if (b > a) return -1;
  362. return 0;
  363. };
  364. var extend = function(a, b) {
  365. var i, n, k, object;
  366. for (i = 1, n = arguments.length; i < n; i++) {
  367. object = arguments[i];
  368. if (!object) continue;
  369. for (k in object) {
  370. if (object.hasOwnProperty(k)) {
  371. a[k] = object[k];
  372. }
  373. }
  374. }
  375. return a;
  376. };
  377. var trim = function(str) {
  378. return (str + '').replace(/^\s+|\s+$|/g, '');
  379. };
  380. var escape_regex = function(str) {
  381. return (str + '').replace(/([.?*+^$[\]\\(){}|-])/g, '\\$1');
  382. };
  383. var is_array = Array.isArray || ($ && $.isArray) || function(object) {
  384. return Object.prototype.toString.call(object) === '[object Array]';
  385. };
  386. var DIACRITICS = {
  387. 'a': '[aÀÁÂÃÄÅàáâãäåĀāąĄ]',
  388. 'c': '[cÇçćĆčČ]',
  389. 'd': '[dđĐďĎ]',
  390. 'e': '[eÈÉÊËèéêëěĚĒēęĘ]',
  391. 'i': '[iÌÍÎÏìíîïĪī]',
  392. 'l': '[lłŁ]',
  393. 'n': '[nÑñňŇńŃ]',
  394. 'o': '[oÒÓÔÕÕÖØòóôõöøŌō]',
  395. 'r': '[rřŘ]',
  396. 's': '[sŠšśŚ]',
  397. 't': '[tťŤ]',
  398. 'u': '[uÙÚÛÜùúûüůŮŪū]',
  399. 'y': '[yŸÿýÝ]',
  400. 'z': '[zŽžżŻźŹ]'
  401. };
  402. var asciifold = (function() {
  403. var i, n, k, chunk;
  404. var foreignletters = '';
  405. var lookup = {};
  406. for (k in DIACRITICS) {
  407. if (DIACRITICS.hasOwnProperty(k)) {
  408. chunk = DIACRITICS[k].substring(2, DIACRITICS[k].length - 1);
  409. foreignletters += chunk;
  410. for (i = 0, n = chunk.length; i < n; i++) {
  411. lookup[chunk.charAt(i)] = k;
  412. }
  413. }
  414. }
  415. var regexp = new RegExp('[' + foreignletters + ']', 'g');
  416. return function(str) {
  417. return str.replace(regexp, function(foreignletter) {
  418. return lookup[foreignletter];
  419. }).toLowerCase();
  420. };
  421. })();
  422. // export
  423. // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  424. return Sifter;
  425. }));