fts5_vocab.c 24 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809
  1. /*
  2. ** 2015 May 08
  3. **
  4. ** The author disclaims copyright to this source code. In place of
  5. ** a legal notice, here is a blessing:
  6. **
  7. ** May you do good and not evil.
  8. ** May you find forgiveness for yourself and forgive others.
  9. ** May you share freely, never taking more than you give.
  10. **
  11. ******************************************************************************
  12. **
  13. ** This is an SQLite virtual table module implementing direct access to an
  14. ** existing FTS5 index. The module may create several different types of
  15. ** tables:
  16. **
  17. ** col:
  18. ** CREATE TABLE vocab(term, col, doc, cnt, PRIMARY KEY(term, col));
  19. **
  20. ** One row for each term/column combination. The value of $doc is set to
  21. ** the number of fts5 rows that contain at least one instance of term
  22. ** $term within column $col. Field $cnt is set to the total number of
  23. ** instances of term $term in column $col (in any row of the fts5 table).
  24. **
  25. ** row:
  26. ** CREATE TABLE vocab(term, doc, cnt, PRIMARY KEY(term));
  27. **
  28. ** One row for each term in the database. The value of $doc is set to
  29. ** the number of fts5 rows that contain at least one instance of term
  30. ** $term. Field $cnt is set to the total number of instances of term
  31. ** $term in the database.
  32. **
  33. ** instance:
  34. ** CREATE TABLE vocab(term, doc, col, offset, PRIMARY KEY(<all-fields>));
  35. **
  36. ** One row for each term instance in the database.
  37. */
  38. #include "fts5Int.h"
  39. typedef struct Fts5VocabTable Fts5VocabTable;
  40. typedef struct Fts5VocabCursor Fts5VocabCursor;
  41. struct Fts5VocabTable {
  42. sqlite3_vtab base;
  43. char *zFts5Tbl; /* Name of fts5 table */
  44. char *zFts5Db; /* Db containing fts5 table */
  45. sqlite3 *db; /* Database handle */
  46. Fts5Global *pGlobal; /* FTS5 global object for this database */
  47. int eType; /* FTS5_VOCAB_COL, ROW or INSTANCE */
  48. unsigned bBusy; /* True if busy */
  49. };
  50. struct Fts5VocabCursor {
  51. sqlite3_vtab_cursor base;
  52. sqlite3_stmt *pStmt; /* Statement holding lock on pIndex */
  53. Fts5Table *pFts5; /* Associated FTS5 table */
  54. int bEof; /* True if this cursor is at EOF */
  55. Fts5IndexIter *pIter; /* Term/rowid iterator object */
  56. void *pStruct; /* From sqlite3Fts5StructureRef() */
  57. int nLeTerm; /* Size of zLeTerm in bytes */
  58. char *zLeTerm; /* (term <= $zLeTerm) paramater, or NULL */
  59. int colUsed; /* Copy of sqlite3_index_info.colUsed */
  60. /* These are used by 'col' tables only */
  61. int iCol;
  62. i64 *aCnt;
  63. i64 *aDoc;
  64. /* Output values used by all tables. */
  65. i64 rowid; /* This table's current rowid value */
  66. Fts5Buffer term; /* Current value of 'term' column */
  67. /* Output values Used by 'instance' tables only */
  68. i64 iInstPos;
  69. int iInstOff;
  70. };
  71. #define FTS5_VOCAB_COL 0
  72. #define FTS5_VOCAB_ROW 1
  73. #define FTS5_VOCAB_INSTANCE 2
  74. #define FTS5_VOCAB_COL_SCHEMA "term, col, doc, cnt"
  75. #define FTS5_VOCAB_ROW_SCHEMA "term, doc, cnt"
  76. #define FTS5_VOCAB_INST_SCHEMA "term, doc, col, offset"
  77. /*
  78. ** Bits for the mask used as the idxNum value by xBestIndex/xFilter.
  79. */
  80. #define FTS5_VOCAB_TERM_EQ 0x0100
  81. #define FTS5_VOCAB_TERM_GE 0x0200
  82. #define FTS5_VOCAB_TERM_LE 0x0400
  83. #define FTS5_VOCAB_COLUSED_MASK 0xFF
  84. /*
  85. ** Translate a string containing an fts5vocab table type to an
  86. ** FTS5_VOCAB_XXX constant. If successful, set *peType to the output
  87. ** value and return SQLITE_OK. Otherwise, set *pzErr to an error message
  88. ** and return SQLITE_ERROR.
  89. */
  90. static int fts5VocabTableType(const char *zType, char **pzErr, int *peType){
  91. int rc = SQLITE_OK;
  92. char *zCopy = sqlite3Fts5Strndup(&rc, zType, -1);
  93. if( rc==SQLITE_OK ){
  94. sqlite3Fts5Dequote(zCopy);
  95. if( sqlite3_stricmp(zCopy, "col")==0 ){
  96. *peType = FTS5_VOCAB_COL;
  97. }else
  98. if( sqlite3_stricmp(zCopy, "row")==0 ){
  99. *peType = FTS5_VOCAB_ROW;
  100. }else
  101. if( sqlite3_stricmp(zCopy, "instance")==0 ){
  102. *peType = FTS5_VOCAB_INSTANCE;
  103. }else
  104. {
  105. *pzErr = sqlite3_mprintf("fts5vocab: unknown table type: %Q", zCopy);
  106. rc = SQLITE_ERROR;
  107. }
  108. sqlite3_free(zCopy);
  109. }
  110. return rc;
  111. }
  112. /*
  113. ** The xDisconnect() virtual table method.
  114. */
  115. static int fts5VocabDisconnectMethod(sqlite3_vtab *pVtab){
  116. Fts5VocabTable *pTab = (Fts5VocabTable*)pVtab;
  117. sqlite3_free(pTab);
  118. return SQLITE_OK;
  119. }
  120. /*
  121. ** The xDestroy() virtual table method.
  122. */
  123. static int fts5VocabDestroyMethod(sqlite3_vtab *pVtab){
  124. Fts5VocabTable *pTab = (Fts5VocabTable*)pVtab;
  125. sqlite3_free(pTab);
  126. return SQLITE_OK;
  127. }
  128. /*
  129. ** This function is the implementation of both the xConnect and xCreate
  130. ** methods of the FTS3 virtual table.
  131. **
  132. ** The argv[] array contains the following:
  133. **
  134. ** argv[0] -> module name ("fts5vocab")
  135. ** argv[1] -> database name
  136. ** argv[2] -> table name
  137. **
  138. ** then:
  139. **
  140. ** argv[3] -> name of fts5 table
  141. ** argv[4] -> type of fts5vocab table
  142. **
  143. ** or, for tables in the TEMP schema only.
  144. **
  145. ** argv[3] -> name of fts5 tables database
  146. ** argv[4] -> name of fts5 table
  147. ** argv[5] -> type of fts5vocab table
  148. */
  149. static int fts5VocabInitVtab(
  150. sqlite3 *db, /* The SQLite database connection */
  151. void *pAux, /* Pointer to Fts5Global object */
  152. int argc, /* Number of elements in argv array */
  153. const char * const *argv, /* xCreate/xConnect argument array */
  154. sqlite3_vtab **ppVTab, /* Write the resulting vtab structure here */
  155. char **pzErr /* Write any error message here */
  156. ){
  157. const char *azSchema[] = {
  158. "CREATE TABlE vocab(" FTS5_VOCAB_COL_SCHEMA ")",
  159. "CREATE TABlE vocab(" FTS5_VOCAB_ROW_SCHEMA ")",
  160. "CREATE TABlE vocab(" FTS5_VOCAB_INST_SCHEMA ")"
  161. };
  162. Fts5VocabTable *pRet = 0;
  163. int rc = SQLITE_OK; /* Return code */
  164. int bDb;
  165. bDb = (argc==6 && strlen(argv[1])==4 && memcmp("temp", argv[1], 4)==0);
  166. if( argc!=5 && bDb==0 ){
  167. *pzErr = sqlite3_mprintf("wrong number of vtable arguments");
  168. rc = SQLITE_ERROR;
  169. }else{
  170. int nByte; /* Bytes of space to allocate */
  171. const char *zDb = bDb ? argv[3] : argv[1];
  172. const char *zTab = bDb ? argv[4] : argv[3];
  173. const char *zType = bDb ? argv[5] : argv[4];
  174. int nDb = (int)strlen(zDb)+1;
  175. int nTab = (int)strlen(zTab)+1;
  176. int eType = 0;
  177. rc = fts5VocabTableType(zType, pzErr, &eType);
  178. if( rc==SQLITE_OK ){
  179. assert( eType>=0 && eType<ArraySize(azSchema) );
  180. rc = sqlite3_declare_vtab(db, azSchema[eType]);
  181. }
  182. nByte = sizeof(Fts5VocabTable) + nDb + nTab;
  183. pRet = sqlite3Fts5MallocZero(&rc, nByte);
  184. if( pRet ){
  185. pRet->pGlobal = (Fts5Global*)pAux;
  186. pRet->eType = eType;
  187. pRet->db = db;
  188. pRet->zFts5Tbl = (char*)&pRet[1];
  189. pRet->zFts5Db = &pRet->zFts5Tbl[nTab];
  190. memcpy(pRet->zFts5Tbl, zTab, nTab);
  191. memcpy(pRet->zFts5Db, zDb, nDb);
  192. sqlite3Fts5Dequote(pRet->zFts5Tbl);
  193. sqlite3Fts5Dequote(pRet->zFts5Db);
  194. }
  195. }
  196. *ppVTab = (sqlite3_vtab*)pRet;
  197. return rc;
  198. }
  199. /*
  200. ** The xConnect() and xCreate() methods for the virtual table. All the
  201. ** work is done in function fts5VocabInitVtab().
  202. */
  203. static int fts5VocabConnectMethod(
  204. sqlite3 *db, /* Database connection */
  205. void *pAux, /* Pointer to tokenizer hash table */
  206. int argc, /* Number of elements in argv array */
  207. const char * const *argv, /* xCreate/xConnect argument array */
  208. sqlite3_vtab **ppVtab, /* OUT: New sqlite3_vtab object */
  209. char **pzErr /* OUT: sqlite3_malloc'd error message */
  210. ){
  211. return fts5VocabInitVtab(db, pAux, argc, argv, ppVtab, pzErr);
  212. }
  213. static int fts5VocabCreateMethod(
  214. sqlite3 *db, /* Database connection */
  215. void *pAux, /* Pointer to tokenizer hash table */
  216. int argc, /* Number of elements in argv array */
  217. const char * const *argv, /* xCreate/xConnect argument array */
  218. sqlite3_vtab **ppVtab, /* OUT: New sqlite3_vtab object */
  219. char **pzErr /* OUT: sqlite3_malloc'd error message */
  220. ){
  221. return fts5VocabInitVtab(db, pAux, argc, argv, ppVtab, pzErr);
  222. }
  223. /*
  224. ** Implementation of the xBestIndex method.
  225. **
  226. ** Only constraints of the form:
  227. **
  228. ** term <= ?
  229. ** term == ?
  230. ** term >= ?
  231. **
  232. ** are interpreted. Less-than and less-than-or-equal are treated
  233. ** identically, as are greater-than and greater-than-or-equal.
  234. */
  235. static int fts5VocabBestIndexMethod(
  236. sqlite3_vtab *pUnused,
  237. sqlite3_index_info *pInfo
  238. ){
  239. int i;
  240. int iTermEq = -1;
  241. int iTermGe = -1;
  242. int iTermLe = -1;
  243. int idxNum = (int)pInfo->colUsed;
  244. int nArg = 0;
  245. UNUSED_PARAM(pUnused);
  246. assert( (pInfo->colUsed & FTS5_VOCAB_COLUSED_MASK)==pInfo->colUsed );
  247. for(i=0; i<pInfo->nConstraint; i++){
  248. struct sqlite3_index_constraint *p = &pInfo->aConstraint[i];
  249. if( p->usable==0 ) continue;
  250. if( p->iColumn==0 ){ /* term column */
  251. if( p->op==SQLITE_INDEX_CONSTRAINT_EQ ) iTermEq = i;
  252. if( p->op==SQLITE_INDEX_CONSTRAINT_LE ) iTermLe = i;
  253. if( p->op==SQLITE_INDEX_CONSTRAINT_LT ) iTermLe = i;
  254. if( p->op==SQLITE_INDEX_CONSTRAINT_GE ) iTermGe = i;
  255. if( p->op==SQLITE_INDEX_CONSTRAINT_GT ) iTermGe = i;
  256. }
  257. }
  258. if( iTermEq>=0 ){
  259. idxNum |= FTS5_VOCAB_TERM_EQ;
  260. pInfo->aConstraintUsage[iTermEq].argvIndex = ++nArg;
  261. pInfo->estimatedCost = 100;
  262. }else{
  263. pInfo->estimatedCost = 1000000;
  264. if( iTermGe>=0 ){
  265. idxNum |= FTS5_VOCAB_TERM_GE;
  266. pInfo->aConstraintUsage[iTermGe].argvIndex = ++nArg;
  267. pInfo->estimatedCost = pInfo->estimatedCost / 2;
  268. }
  269. if( iTermLe>=0 ){
  270. idxNum |= FTS5_VOCAB_TERM_LE;
  271. pInfo->aConstraintUsage[iTermLe].argvIndex = ++nArg;
  272. pInfo->estimatedCost = pInfo->estimatedCost / 2;
  273. }
  274. }
  275. /* This virtual table always delivers results in ascending order of
  276. ** the "term" column (column 0). So if the user has requested this
  277. ** specifically - "ORDER BY term" or "ORDER BY term ASC" - set the
  278. ** sqlite3_index_info.orderByConsumed flag to tell the core the results
  279. ** are already in sorted order. */
  280. if( pInfo->nOrderBy==1
  281. && pInfo->aOrderBy[0].iColumn==0
  282. && pInfo->aOrderBy[0].desc==0
  283. ){
  284. pInfo->orderByConsumed = 1;
  285. }
  286. pInfo->idxNum = idxNum;
  287. return SQLITE_OK;
  288. }
  289. /*
  290. ** Implementation of xOpen method.
  291. */
  292. static int fts5VocabOpenMethod(
  293. sqlite3_vtab *pVTab,
  294. sqlite3_vtab_cursor **ppCsr
  295. ){
  296. Fts5VocabTable *pTab = (Fts5VocabTable*)pVTab;
  297. Fts5Table *pFts5 = 0;
  298. Fts5VocabCursor *pCsr = 0;
  299. int rc = SQLITE_OK;
  300. sqlite3_stmt *pStmt = 0;
  301. char *zSql = 0;
  302. if( pTab->bBusy ){
  303. pVTab->zErrMsg = sqlite3_mprintf(
  304. "recursive definition for %s.%s", pTab->zFts5Db, pTab->zFts5Tbl
  305. );
  306. return SQLITE_ERROR;
  307. }
  308. zSql = sqlite3Fts5Mprintf(&rc,
  309. "SELECT t.%Q FROM %Q.%Q AS t WHERE t.%Q MATCH '*id'",
  310. pTab->zFts5Tbl, pTab->zFts5Db, pTab->zFts5Tbl, pTab->zFts5Tbl
  311. );
  312. if( zSql ){
  313. rc = sqlite3_prepare_v2(pTab->db, zSql, -1, &pStmt, 0);
  314. }
  315. sqlite3_free(zSql);
  316. assert( rc==SQLITE_OK || pStmt==0 );
  317. if( rc==SQLITE_ERROR ) rc = SQLITE_OK;
  318. pTab->bBusy = 1;
  319. if( pStmt && sqlite3_step(pStmt)==SQLITE_ROW ){
  320. i64 iId = sqlite3_column_int64(pStmt, 0);
  321. pFts5 = sqlite3Fts5TableFromCsrid(pTab->pGlobal, iId);
  322. }
  323. pTab->bBusy = 0;
  324. if( rc==SQLITE_OK ){
  325. if( pFts5==0 ){
  326. rc = sqlite3_finalize(pStmt);
  327. pStmt = 0;
  328. if( rc==SQLITE_OK ){
  329. pVTab->zErrMsg = sqlite3_mprintf(
  330. "no such fts5 table: %s.%s", pTab->zFts5Db, pTab->zFts5Tbl
  331. );
  332. rc = SQLITE_ERROR;
  333. }
  334. }else{
  335. rc = sqlite3Fts5FlushToDisk(pFts5);
  336. }
  337. }
  338. if( rc==SQLITE_OK ){
  339. i64 nByte = pFts5->pConfig->nCol * sizeof(i64)*2 + sizeof(Fts5VocabCursor);
  340. pCsr = (Fts5VocabCursor*)sqlite3Fts5MallocZero(&rc, nByte);
  341. }
  342. if( pCsr ){
  343. pCsr->pFts5 = pFts5;
  344. pCsr->pStmt = pStmt;
  345. pCsr->aCnt = (i64*)&pCsr[1];
  346. pCsr->aDoc = &pCsr->aCnt[pFts5->pConfig->nCol];
  347. }else{
  348. sqlite3_finalize(pStmt);
  349. }
  350. *ppCsr = (sqlite3_vtab_cursor*)pCsr;
  351. return rc;
  352. }
  353. static void fts5VocabResetCursor(Fts5VocabCursor *pCsr){
  354. pCsr->rowid = 0;
  355. sqlite3Fts5IterClose(pCsr->pIter);
  356. sqlite3Fts5StructureRelease(pCsr->pStruct);
  357. pCsr->pStruct = 0;
  358. pCsr->pIter = 0;
  359. sqlite3_free(pCsr->zLeTerm);
  360. pCsr->nLeTerm = -1;
  361. pCsr->zLeTerm = 0;
  362. pCsr->bEof = 0;
  363. }
  364. /*
  365. ** Close the cursor. For additional information see the documentation
  366. ** on the xClose method of the virtual table interface.
  367. */
  368. static int fts5VocabCloseMethod(sqlite3_vtab_cursor *pCursor){
  369. Fts5VocabCursor *pCsr = (Fts5VocabCursor*)pCursor;
  370. fts5VocabResetCursor(pCsr);
  371. sqlite3Fts5BufferFree(&pCsr->term);
  372. sqlite3_finalize(pCsr->pStmt);
  373. sqlite3_free(pCsr);
  374. return SQLITE_OK;
  375. }
  376. static int fts5VocabInstanceNewTerm(Fts5VocabCursor *pCsr){
  377. int rc = SQLITE_OK;
  378. if( sqlite3Fts5IterEof(pCsr->pIter) ){
  379. pCsr->bEof = 1;
  380. }else{
  381. const char *zTerm;
  382. int nTerm;
  383. zTerm = sqlite3Fts5IterTerm(pCsr->pIter, &nTerm);
  384. if( pCsr->nLeTerm>=0 ){
  385. int nCmp = MIN(nTerm, pCsr->nLeTerm);
  386. int bCmp = memcmp(pCsr->zLeTerm, zTerm, nCmp);
  387. if( bCmp<0 || (bCmp==0 && pCsr->nLeTerm<nTerm) ){
  388. pCsr->bEof = 1;
  389. }
  390. }
  391. sqlite3Fts5BufferSet(&rc, &pCsr->term, nTerm, (const u8*)zTerm);
  392. }
  393. return rc;
  394. }
  395. static int fts5VocabInstanceNext(Fts5VocabCursor *pCsr){
  396. int eDetail = pCsr->pFts5->pConfig->eDetail;
  397. int rc = SQLITE_OK;
  398. Fts5IndexIter *pIter = pCsr->pIter;
  399. i64 *pp = &pCsr->iInstPos;
  400. int *po = &pCsr->iInstOff;
  401. assert( sqlite3Fts5IterEof(pIter)==0 );
  402. assert( pCsr->bEof==0 );
  403. while( eDetail==FTS5_DETAIL_NONE
  404. || sqlite3Fts5PoslistNext64(pIter->pData, pIter->nData, po, pp)
  405. ){
  406. pCsr->iInstPos = 0;
  407. pCsr->iInstOff = 0;
  408. rc = sqlite3Fts5IterNextScan(pCsr->pIter);
  409. if( rc==SQLITE_OK ){
  410. rc = fts5VocabInstanceNewTerm(pCsr);
  411. if( pCsr->bEof || eDetail==FTS5_DETAIL_NONE ) break;
  412. }
  413. if( rc ){
  414. pCsr->bEof = 1;
  415. break;
  416. }
  417. }
  418. return rc;
  419. }
  420. /*
  421. ** Advance the cursor to the next row in the table.
  422. */
  423. static int fts5VocabNextMethod(sqlite3_vtab_cursor *pCursor){
  424. Fts5VocabCursor *pCsr = (Fts5VocabCursor*)pCursor;
  425. Fts5VocabTable *pTab = (Fts5VocabTable*)pCursor->pVtab;
  426. int nCol = pCsr->pFts5->pConfig->nCol;
  427. int rc;
  428. rc = sqlite3Fts5StructureTest(pCsr->pFts5->pIndex, pCsr->pStruct);
  429. if( rc!=SQLITE_OK ) return rc;
  430. pCsr->rowid++;
  431. if( pTab->eType==FTS5_VOCAB_INSTANCE ){
  432. return fts5VocabInstanceNext(pCsr);
  433. }
  434. if( pTab->eType==FTS5_VOCAB_COL ){
  435. for(pCsr->iCol++; pCsr->iCol<nCol; pCsr->iCol++){
  436. if( pCsr->aDoc[pCsr->iCol] ) break;
  437. }
  438. }
  439. if( pTab->eType!=FTS5_VOCAB_COL || pCsr->iCol>=nCol ){
  440. if( sqlite3Fts5IterEof(pCsr->pIter) ){
  441. pCsr->bEof = 1;
  442. }else{
  443. const char *zTerm;
  444. int nTerm;
  445. zTerm = sqlite3Fts5IterTerm(pCsr->pIter, &nTerm);
  446. assert( nTerm>=0 );
  447. if( pCsr->nLeTerm>=0 ){
  448. int nCmp = MIN(nTerm, pCsr->nLeTerm);
  449. int bCmp = memcmp(pCsr->zLeTerm, zTerm, nCmp);
  450. if( bCmp<0 || (bCmp==0 && pCsr->nLeTerm<nTerm) ){
  451. pCsr->bEof = 1;
  452. return SQLITE_OK;
  453. }
  454. }
  455. sqlite3Fts5BufferSet(&rc, &pCsr->term, nTerm, (const u8*)zTerm);
  456. memset(pCsr->aCnt, 0, nCol * sizeof(i64));
  457. memset(pCsr->aDoc, 0, nCol * sizeof(i64));
  458. pCsr->iCol = 0;
  459. assert( pTab->eType==FTS5_VOCAB_COL || pTab->eType==FTS5_VOCAB_ROW );
  460. while( rc==SQLITE_OK ){
  461. int eDetail = pCsr->pFts5->pConfig->eDetail;
  462. const u8 *pPos; int nPos; /* Position list */
  463. i64 iPos = 0; /* 64-bit position read from poslist */
  464. int iOff = 0; /* Current offset within position list */
  465. pPos = pCsr->pIter->pData;
  466. nPos = pCsr->pIter->nData;
  467. switch( pTab->eType ){
  468. case FTS5_VOCAB_ROW:
  469. /* Do not bother counting the number of instances if the "cnt"
  470. ** column is not being read (according to colUsed). */
  471. if( eDetail==FTS5_DETAIL_FULL && (pCsr->colUsed & 0x04) ){
  472. while( iPos<nPos ){
  473. u32 ii;
  474. fts5FastGetVarint32(pPos, iPos, ii);
  475. if( ii==1 ){
  476. /* New column in the position list */
  477. fts5FastGetVarint32(pPos, iPos, ii);
  478. }else{
  479. /* An instance - increment pCsr->aCnt[] */
  480. pCsr->aCnt[0]++;
  481. }
  482. }
  483. }
  484. pCsr->aDoc[0]++;
  485. break;
  486. case FTS5_VOCAB_COL:
  487. if( eDetail==FTS5_DETAIL_FULL ){
  488. int iCol = -1;
  489. while( 0==sqlite3Fts5PoslistNext64(pPos, nPos, &iOff, &iPos) ){
  490. int ii = FTS5_POS2COLUMN(iPos);
  491. if( iCol!=ii ){
  492. if( ii>=nCol ){
  493. rc = FTS5_CORRUPT;
  494. break;
  495. }
  496. pCsr->aDoc[ii]++;
  497. iCol = ii;
  498. }
  499. pCsr->aCnt[ii]++;
  500. }
  501. }else if( eDetail==FTS5_DETAIL_COLUMNS ){
  502. while( 0==sqlite3Fts5PoslistNext64(pPos, nPos, &iOff,&iPos) ){
  503. assert_nc( iPos>=0 && iPos<nCol );
  504. if( iPos>=nCol ){
  505. rc = FTS5_CORRUPT;
  506. break;
  507. }
  508. pCsr->aDoc[iPos]++;
  509. }
  510. }else{
  511. assert( eDetail==FTS5_DETAIL_NONE );
  512. pCsr->aDoc[0]++;
  513. }
  514. break;
  515. default:
  516. assert( pTab->eType==FTS5_VOCAB_INSTANCE );
  517. break;
  518. }
  519. if( rc==SQLITE_OK ){
  520. rc = sqlite3Fts5IterNextScan(pCsr->pIter);
  521. }
  522. if( pTab->eType==FTS5_VOCAB_INSTANCE ) break;
  523. if( rc==SQLITE_OK ){
  524. zTerm = sqlite3Fts5IterTerm(pCsr->pIter, &nTerm);
  525. if( nTerm!=pCsr->term.n
  526. || (nTerm>0 && memcmp(zTerm, pCsr->term.p, nTerm))
  527. ){
  528. break;
  529. }
  530. if( sqlite3Fts5IterEof(pCsr->pIter) ) break;
  531. }
  532. }
  533. }
  534. }
  535. if( rc==SQLITE_OK && pCsr->bEof==0 && pTab->eType==FTS5_VOCAB_COL ){
  536. for(/* noop */; pCsr->iCol<nCol && pCsr->aDoc[pCsr->iCol]==0; pCsr->iCol++);
  537. if( pCsr->iCol==nCol ){
  538. rc = FTS5_CORRUPT;
  539. }
  540. }
  541. return rc;
  542. }
  543. /*
  544. ** This is the xFilter implementation for the virtual table.
  545. */
  546. static int fts5VocabFilterMethod(
  547. sqlite3_vtab_cursor *pCursor, /* The cursor used for this query */
  548. int idxNum, /* Strategy index */
  549. const char *zUnused, /* Unused */
  550. int nUnused, /* Number of elements in apVal */
  551. sqlite3_value **apVal /* Arguments for the indexing scheme */
  552. ){
  553. Fts5VocabTable *pTab = (Fts5VocabTable*)pCursor->pVtab;
  554. Fts5VocabCursor *pCsr = (Fts5VocabCursor*)pCursor;
  555. int eType = pTab->eType;
  556. int rc = SQLITE_OK;
  557. int iVal = 0;
  558. int f = FTS5INDEX_QUERY_SCAN;
  559. const char *zTerm = 0;
  560. int nTerm = 0;
  561. sqlite3_value *pEq = 0;
  562. sqlite3_value *pGe = 0;
  563. sqlite3_value *pLe = 0;
  564. UNUSED_PARAM2(zUnused, nUnused);
  565. fts5VocabResetCursor(pCsr);
  566. if( idxNum & FTS5_VOCAB_TERM_EQ ) pEq = apVal[iVal++];
  567. if( idxNum & FTS5_VOCAB_TERM_GE ) pGe = apVal[iVal++];
  568. if( idxNum & FTS5_VOCAB_TERM_LE ) pLe = apVal[iVal++];
  569. pCsr->colUsed = (idxNum & FTS5_VOCAB_COLUSED_MASK);
  570. if( pEq ){
  571. zTerm = (const char *)sqlite3_value_text(pEq);
  572. nTerm = sqlite3_value_bytes(pEq);
  573. f = FTS5INDEX_QUERY_NOTOKENDATA;
  574. }else{
  575. if( pGe ){
  576. zTerm = (const char *)sqlite3_value_text(pGe);
  577. nTerm = sqlite3_value_bytes(pGe);
  578. }
  579. if( pLe ){
  580. const char *zCopy = (const char *)sqlite3_value_text(pLe);
  581. if( zCopy==0 ) zCopy = "";
  582. pCsr->nLeTerm = sqlite3_value_bytes(pLe);
  583. pCsr->zLeTerm = sqlite3_malloc(pCsr->nLeTerm+1);
  584. if( pCsr->zLeTerm==0 ){
  585. rc = SQLITE_NOMEM;
  586. }else{
  587. memcpy(pCsr->zLeTerm, zCopy, pCsr->nLeTerm+1);
  588. }
  589. }
  590. }
  591. if( rc==SQLITE_OK ){
  592. Fts5Index *pIndex = pCsr->pFts5->pIndex;
  593. rc = sqlite3Fts5IndexQuery(pIndex, zTerm, nTerm, f, 0, &pCsr->pIter);
  594. if( rc==SQLITE_OK ){
  595. pCsr->pStruct = sqlite3Fts5StructureRef(pIndex);
  596. }
  597. }
  598. if( rc==SQLITE_OK && eType==FTS5_VOCAB_INSTANCE ){
  599. rc = fts5VocabInstanceNewTerm(pCsr);
  600. }
  601. if( rc==SQLITE_OK && !pCsr->bEof
  602. && (eType!=FTS5_VOCAB_INSTANCE
  603. || pCsr->pFts5->pConfig->eDetail!=FTS5_DETAIL_NONE)
  604. ){
  605. rc = fts5VocabNextMethod(pCursor);
  606. }
  607. return rc;
  608. }
  609. /*
  610. ** This is the xEof method of the virtual table. SQLite calls this
  611. ** routine to find out if it has reached the end of a result set.
  612. */
  613. static int fts5VocabEofMethod(sqlite3_vtab_cursor *pCursor){
  614. Fts5VocabCursor *pCsr = (Fts5VocabCursor*)pCursor;
  615. return pCsr->bEof;
  616. }
  617. static int fts5VocabColumnMethod(
  618. sqlite3_vtab_cursor *pCursor, /* Cursor to retrieve value from */
  619. sqlite3_context *pCtx, /* Context for sqlite3_result_xxx() calls */
  620. int iCol /* Index of column to read value from */
  621. ){
  622. Fts5VocabCursor *pCsr = (Fts5VocabCursor*)pCursor;
  623. int eDetail = pCsr->pFts5->pConfig->eDetail;
  624. int eType = ((Fts5VocabTable*)(pCursor->pVtab))->eType;
  625. i64 iVal = 0;
  626. if( iCol==0 ){
  627. sqlite3_result_text(
  628. pCtx, (const char*)pCsr->term.p, pCsr->term.n, SQLITE_TRANSIENT
  629. );
  630. }else if( eType==FTS5_VOCAB_COL ){
  631. assert( iCol==1 || iCol==2 || iCol==3 );
  632. if( iCol==1 ){
  633. if( eDetail!=FTS5_DETAIL_NONE ){
  634. const char *z = pCsr->pFts5->pConfig->azCol[pCsr->iCol];
  635. sqlite3_result_text(pCtx, z, -1, SQLITE_STATIC);
  636. }
  637. }else if( iCol==2 ){
  638. iVal = pCsr->aDoc[pCsr->iCol];
  639. }else{
  640. iVal = pCsr->aCnt[pCsr->iCol];
  641. }
  642. }else if( eType==FTS5_VOCAB_ROW ){
  643. assert( iCol==1 || iCol==2 );
  644. if( iCol==1 ){
  645. iVal = pCsr->aDoc[0];
  646. }else{
  647. iVal = pCsr->aCnt[0];
  648. }
  649. }else{
  650. assert( eType==FTS5_VOCAB_INSTANCE );
  651. switch( iCol ){
  652. case 1:
  653. sqlite3_result_int64(pCtx, pCsr->pIter->iRowid);
  654. break;
  655. case 2: {
  656. int ii = -1;
  657. if( eDetail==FTS5_DETAIL_FULL ){
  658. ii = FTS5_POS2COLUMN(pCsr->iInstPos);
  659. }else if( eDetail==FTS5_DETAIL_COLUMNS ){
  660. ii = (int)pCsr->iInstPos;
  661. }
  662. if( ii>=0 && ii<pCsr->pFts5->pConfig->nCol ){
  663. const char *z = pCsr->pFts5->pConfig->azCol[ii];
  664. sqlite3_result_text(pCtx, z, -1, SQLITE_STATIC);
  665. }
  666. break;
  667. }
  668. default: {
  669. assert( iCol==3 );
  670. if( eDetail==FTS5_DETAIL_FULL ){
  671. int ii = FTS5_POS2OFFSET(pCsr->iInstPos);
  672. sqlite3_result_int(pCtx, ii);
  673. }
  674. break;
  675. }
  676. }
  677. }
  678. if( iVal>0 ) sqlite3_result_int64(pCtx, iVal);
  679. return SQLITE_OK;
  680. }
  681. /*
  682. ** This is the xRowid method. The SQLite core calls this routine to
  683. ** retrieve the rowid for the current row of the result set. The
  684. ** rowid should be written to *pRowid.
  685. */
  686. static int fts5VocabRowidMethod(
  687. sqlite3_vtab_cursor *pCursor,
  688. sqlite_int64 *pRowid
  689. ){
  690. Fts5VocabCursor *pCsr = (Fts5VocabCursor*)pCursor;
  691. *pRowid = pCsr->rowid;
  692. return SQLITE_OK;
  693. }
  694. int sqlite3Fts5VocabInit(Fts5Global *pGlobal, sqlite3 *db){
  695. static const sqlite3_module fts5Vocab = {
  696. /* iVersion */ 2,
  697. /* xCreate */ fts5VocabCreateMethod,
  698. /* xConnect */ fts5VocabConnectMethod,
  699. /* xBestIndex */ fts5VocabBestIndexMethod,
  700. /* xDisconnect */ fts5VocabDisconnectMethod,
  701. /* xDestroy */ fts5VocabDestroyMethod,
  702. /* xOpen */ fts5VocabOpenMethod,
  703. /* xClose */ fts5VocabCloseMethod,
  704. /* xFilter */ fts5VocabFilterMethod,
  705. /* xNext */ fts5VocabNextMethod,
  706. /* xEof */ fts5VocabEofMethod,
  707. /* xColumn */ fts5VocabColumnMethod,
  708. /* xRowid */ fts5VocabRowidMethod,
  709. /* xUpdate */ 0,
  710. /* xBegin */ 0,
  711. /* xSync */ 0,
  712. /* xCommit */ 0,
  713. /* xRollback */ 0,
  714. /* xFindFunction */ 0,
  715. /* xRename */ 0,
  716. /* xSavepoint */ 0,
  717. /* xRelease */ 0,
  718. /* xRollbackTo */ 0,
  719. /* xShadowName */ 0,
  720. /* xIntegrity */ 0
  721. };
  722. void *p = (void*)pGlobal;
  723. return sqlite3_create_module_v2(db, "fts5vocab", &fts5Vocab, p, 0);
  724. }