[BACK]Return to apropos-utils.c CVS log [TXT][DIR] Up to [cvs.NetBSD.org] / src / usr.sbin / makemandb

Annotation of src/usr.sbin/makemandb/apropos-utils.c, Revision 1.2

1.2     ! joerg       1: /*     $NetBSD$        */
1.1       joerg       2: /*-
                      3:  * Copyright (c) 2011 Abhinav Upadhyay <er.abhinav.upadhyay@gmail.com>
                      4:  * All rights reserved.
                      5:  *
                      6:  * This code was developed as part of Google's Summer of Code 2011 program.
                      7:  *
                      8:  * Redistribution and use in source and binary forms, with or without
                      9:  * modification, are permitted provided that the following conditions
                     10:  * are met:
                     11:  *
                     12:  * 1. Redistributions of source code must retain the above copyright
                     13:  *    notice, this list of conditions and the following disclaimer.
                     14:  * 2. Redistributions in binary form must reproduce the above copyright
                     15:  *    notice, this list of conditions and the following disclaimer in
                     16:  *    the documentation and/or other materials provided with the
                     17:  *    distribution.
                     18:  *
                     19:  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
                     20:  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
                     21:  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
                     22:  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
                     23:  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
                     24:  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
                     25:  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
                     26:  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
                     27:  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
                     28:  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
                     29:  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
                     30:  * SUCH DAMAGE.
                     31:  */
                     32:
                     33: #include <sys/cdefs.h>
1.2     ! joerg      34: __RCSID("$NetBSD: apropos-utils.c,v 1.1 2012/02/07 19:13:32 joerg Exp $");
1.1       joerg      35:
                     36: #include <sys/stat.h>
                     37:
                     38: #include <assert.h>
                     39: #include <ctype.h>
                     40: #include <err.h>
                     41: #include <math.h>
                     42: #include <stdio.h>
                     43: #include <stdlib.h>
                     44: #include <string.h>
                     45: #include <util.h>
                     46: #include <zlib.h>
                     47:
                     48: #include "apropos-utils.h"
                     49: #include "mandoc.h"
                     50: #include "sqlite3.h"
                     51:
                     52: typedef struct orig_callback_data {
                     53:        void *data;
                     54:        int (*callback) (void *, const char *, const char *, const char *,
                     55:                const char *, size_t);
                     56: } orig_callback_data;
                     57:
                     58: typedef struct inverse_document_frequency {
                     59:        double value;
                     60:        int status;
                     61: } inverse_document_frequency;
                     62:
                     63: /* weights for individual columns */
                     64: static const double col_weights[] = {
                     65:        2.0,    // NAME
                     66:        2.00,   // Name-description
                     67:        0.55,   // DESCRIPTION
                     68:        0.10,   // LIBRARY
                     69:        0.001,  //RETURN VALUES
                     70:        0.20,   //ENVIRONMENT
                     71:        0.01,   //FILES
                     72:        0.001,  //EXIT STATUS
                     73:        2.00,   //DIAGNOSTICS
                     74:        0.05,   //ERRORS
                     75:        0.00,   //md5_hash
                     76:        1.00    //machine
                     77: };
                     78:
                     79: /*
                     80:  * lower --
                     81:  *  Converts the string str to lower case
                     82:  */
                     83: char *
                     84: lower(char *str)
                     85: {
                     86:        assert(str);
                     87:        int i = 0;
                     88:        char c;
                     89:        while (str[i] != '\0') {
                     90:                c = tolower((unsigned char) str[i]);
                     91:                str[i++] = c;
                     92:        }
                     93:        return str;
                     94: }
                     95:
                     96: /*
                     97: * concat--
                     98: *  Utility function. Concatenates together: dst, a space character and src.
                     99: * dst + " " + src
                    100: */
                    101: void
                    102: concat(char **dst, const char *src)
                    103: {
                    104:        concat2(dst, src, strlen(src));
                    105: }
                    106:
                    107: void
                    108: concat2(char **dst, const char *src, size_t srclen)
                    109: {
                    110:        size_t total_len, dst_len;
                    111:        assert(src != NULL);
                    112:
                    113:        /* If destination buffer dst is NULL, then simply strdup the source buffer */
                    114:        if (*dst == NULL) {
                    115:                *dst = estrdup(src);
                    116:                return;
                    117:        }
                    118:
                    119:        dst_len = strlen(*dst);
                    120:        /*
                    121:         * NUL Byte and separator space
                    122:         */
                    123:        total_len = dst_len + srclen + 2;
                    124:
                    125:        *dst = erealloc(*dst, total_len);
                    126:
                    127:        /* Append a space at the end of dst */
                    128:        (*dst)[dst_len++] = ' ';
                    129:
                    130:        /* Now, copy src at the end of dst */
                    131:        memcpy(*dst + dst_len, src, srclen + 1);
                    132: }
                    133:
                    134: void
                    135: close_db(sqlite3 *db)
                    136: {
                    137:        sqlite3_close(db);
                    138:        sqlite3_shutdown();
                    139: }
                    140:
                    141: /*
                    142:  * create_db --
                    143:  *  Creates the database schema.
                    144:  */
                    145: static int
                    146: create_db(sqlite3 *db)
                    147: {
                    148:        const char *sqlstr = NULL;
                    149:        char *schemasql;
                    150:        char *errmsg = NULL;
                    151:
                    152: /*------------------------ Create the tables------------------------------*/
                    153:
                    154: #if NOTYET
                    155:        sqlite3_exec(db, "PRAGMA journal_mode = WAL", NULL, NULL, NULL);
                    156: #else
                    157:        sqlite3_exec(db, "PRAGMA journal_mode = DELETE", NULL, NULL, NULL);
                    158: #endif
                    159:
                    160:        schemasql = sqlite3_mprintf("PRAGMA user_version = %d",
                    161:            APROPOS_SCHEMA_VERSION);
                    162:        sqlite3_exec(db, schemasql, NULL, NULL, &errmsg);
                    163:        if (errmsg != NULL)
                    164:                goto out;
                    165:        sqlite3_free(schemasql);
                    166:
                    167:        sqlstr = "CREATE VIRTUAL TABLE mandb USING fts4(section, name, "
                    168:                            "name_desc, desc, lib, return_vals, env, files, "
                    169:                            "exit_status, diagnostics, errors, md5_hash UNIQUE, machine, "
                    170:                            "compress=zip, uncompress=unzip, tokenize=porter); "        //mandb
                    171:                        "CREATE TABLE IF NOT EXISTS mandb_meta(device, inode, mtime, "
                    172:                            "file UNIQUE, md5_hash UNIQUE, id  INTEGER PRIMARY KEY); "
                    173:                                //mandb_meta
                    174:                        "CREATE TABLE IF NOT EXISTS mandb_links(link, target, section, "
                    175:                            "machine); ";       //mandb_links
                    176:
                    177:        sqlite3_exec(db, sqlstr, NULL, NULL, &errmsg);
                    178:        if (errmsg != NULL)
                    179:                goto out;
                    180:
                    181:        sqlstr = "CREATE INDEX IF NOT EXISTS index_mandb_links ON mandb_links "
                    182:                        "(link); "
                    183:                        "CREATE INDEX IF NOT EXISTS index_mandb_meta_dev ON mandb_meta "
                    184:                        "(device, inode)";
                    185:        sqlite3_exec(db, sqlstr, NULL, NULL, &errmsg);
                    186:        if (errmsg != NULL)
                    187:                goto out;
                    188:        return 0;
                    189:
                    190: out:
                    191:        warnx("%s", errmsg);
                    192:        free(errmsg);
                    193:        sqlite3_close(db);
                    194:        sqlite3_shutdown();
                    195:        return -1;
                    196: }
                    197:
                    198: /*
                    199:  * zip --
                    200:  *  User defined Sqlite function to compress the FTS table
                    201:  */
                    202: static void
                    203: zip(sqlite3_context *pctx, int nval, sqlite3_value **apval)
                    204: {
                    205:        int nin;
                    206:        long int nout;
                    207:        const unsigned char * inbuf;
                    208:        unsigned char *outbuf;
                    209:
                    210:        assert(nval == 1);
                    211:        nin = sqlite3_value_bytes(apval[0]);
                    212:        inbuf = (const unsigned char *) sqlite3_value_blob(apval[0]);
                    213:        nout = nin + 13 + (nin + 999) / 1000;
                    214:        outbuf = emalloc(nout);
                    215:        compress(outbuf, (unsigned long *) &nout, inbuf, nin);
                    216:        sqlite3_result_blob(pctx, outbuf, nout, free);
                    217: }
                    218:
                    219: /*
                    220:  * unzip --
                    221:  *  User defined Sqlite function to uncompress the FTS table.
                    222:  */
                    223: static void
                    224: unzip(sqlite3_context *pctx, int nval, sqlite3_value **apval)
                    225: {
                    226:        unsigned int rc;
                    227:        unsigned char *outbuf;
                    228:        z_stream stream;
                    229:
                    230:        assert(nval == 1);
                    231:        stream.next_in = __UNCONST(sqlite3_value_blob(apval[0]));
                    232:        stream.avail_in = sqlite3_value_bytes(apval[0]);
                    233:        stream.avail_out = stream.avail_in * 2 + 100;
                    234:        stream.next_out = outbuf = emalloc(stream.avail_out);
                    235:        stream.zalloc = NULL;
                    236:        stream.zfree = NULL;
                    237:
                    238:        if (inflateInit(&stream) != Z_OK) {
                    239:                free(outbuf);
                    240:                return;
                    241:        }
                    242:
                    243:        while ((rc = inflate(&stream, Z_SYNC_FLUSH)) != Z_STREAM_END) {
                    244:                if (rc != Z_OK ||
                    245:                    (stream.avail_out != 0 && stream.avail_in == 0)) {
                    246:                        free(outbuf);
                    247:                        return;
                    248:                }
                    249:                outbuf = erealloc(outbuf, stream.total_out * 2);
                    250:                stream.next_out = outbuf + stream.total_out;
                    251:                stream.avail_out = stream.total_out;
                    252:        }
                    253:        if (inflateEnd(&stream) != Z_OK) {
                    254:                free(outbuf);
                    255:                return;
                    256:        }
                    257:        outbuf = erealloc(outbuf, stream.total_out);
                    258:        sqlite3_result_text(pctx, (const char *) outbuf, stream.total_out, free);
                    259: }
                    260:
                    261: /* init_db --
                    262:  *   Prepare the database. Register the compress/uncompress functions and the
                    263:  *   stopword tokenizer.
                    264:  *      db_flag specifies the mode in which to open the database. 3 options are
                    265:  *   available:
                    266:  *     1. DB_READONLY: Open in READONLY mode. An error if db does not exist.
                    267:  *     2. DB_READWRITE: Open in read-write mode. An error if db does not exist.
                    268:  *     3. DB_CREATE: Open in read-write mode. It will try to create the db if
                    269:  *                     it does not exist already.
                    270:  *  RETURN VALUES:
                    271:  *             The function will return NULL in case the db does not exist and DB_CREATE
                    272:  *     was not specified. And in case DB_CREATE was specified and yet NULL is
                    273:  *     returned, then there was some other error.
                    274:  *     In normal cases the function should return a handle to the db.
                    275:  */
                    276: sqlite3 *
                    277: init_db(int db_flag)
                    278: {
                    279:        sqlite3 *db = NULL;
                    280:        sqlite3_stmt *stmt;
                    281:        struct stat sb;
                    282:        int rc;
                    283:        int create_db_flag = 0;
                    284:
                    285:        /* Check if the database exists or not */
                    286:        if (!(stat(DBPATH, &sb) == 0 && S_ISREG(sb.st_mode))) {
                    287:                /* Database does not exist, check if DB_CREATE was specified, and set
                    288:                 * flag to create the database schema
                    289:                 */
                    290:                if (db_flag != (MANDB_CREATE)) {
                    291:                        warnx("Missing apropos database. "
                    292:                              "Please run makemandb to create it.");
                    293:                        return NULL;
                    294:                }
                    295:                create_db_flag = 1;
                    296:        }
                    297:
                    298:        /* Now initialize the database connection */
                    299:        sqlite3_initialize();
                    300:        rc = sqlite3_open_v2(DBPATH, &db, db_flag, NULL);
                    301:
                    302:        if (rc != SQLITE_OK) {
                    303:                warnx("%s", sqlite3_errmsg(db));
                    304:                sqlite3_shutdown();
                    305:                return NULL;
                    306:        }
                    307:
                    308:        if (create_db_flag && create_db(db) < 0) {
                    309:                warnx("%s", "Unable to create database schema");
                    310:                goto error;
                    311:        }
                    312:
                    313:        rc = sqlite3_prepare_v2(db, "PRAGMA user_version", -1, &stmt, NULL);
                    314:        if (rc != SQLITE_OK) {
                    315:                warnx("Unable to query schema version");
                    316:                goto error;
                    317:        }
                    318:        if (sqlite3_step(stmt) != SQLITE_ROW) {
                    319:                sqlite3_finalize(stmt);
                    320:                warnx("Unable to query schema version");
                    321:                goto error;
                    322:        }
                    323:        if (sqlite3_column_int(stmt, 0) != APROPOS_SCHEMA_VERSION) {
                    324:                sqlite3_finalize(stmt);
                    325:                warnx("Incorrect schema version found. "
                    326:                      "Please run makemandb -f.");
                    327:                goto error;
                    328:        }
                    329:        sqlite3_finalize(stmt);
                    330:
                    331:        sqlite3_extended_result_codes(db, 1);
                    332:
                    333:        /* Register the zip and unzip functions for FTS compression */
                    334:        rc = sqlite3_create_function(db, "zip", 1, SQLITE_ANY, NULL, zip, NULL, NULL);
                    335:        if (rc != SQLITE_OK) {
                    336:                warnx("Unable to register function: compress");
                    337:                goto error;
                    338:        }
                    339:
                    340:        rc = sqlite3_create_function(db, "unzip", 1, SQLITE_ANY, NULL,
                    341:                                  unzip, NULL, NULL);
                    342:        if (rc != SQLITE_OK) {
                    343:                warnx("Unable to register function: uncompress");
                    344:                goto error;
                    345:        }
                    346:        return db;
                    347: error:
                    348:        sqlite3_close(db);
                    349:        sqlite3_shutdown();
                    350:        return NULL;
                    351: }
                    352:
                    353: /*
                    354:  * rank_func --
                    355:  *  Sqlite user defined function for ranking the documents.
                    356:  *  For each phrase of the query, it computes the tf and idf and adds them over.
                    357:  *  It computes the final rank, by multiplying tf and idf together.
                    358:  *  Weight of term t for document d = (term frequency of t in d *
                    359:  *                                      inverse document frequency of t)
                    360:  *
                    361:  *  Term Frequency of term t in document d = Number of times t occurs in d /
                    362:  *                                             Number of times t appears in all
                    363:  *                                                                                     documents
                    364:  *
                    365:  *  Inverse document frequency of t = log(Total number of documents /
                    366:  *                                                                             Number of documents in which t occurs)
                    367:  */
                    368: static void
                    369: rank_func(sqlite3_context *pctx, int nval, sqlite3_value **apval)
                    370: {
                    371:        inverse_document_frequency *idf = sqlite3_user_data(pctx);
                    372:        double tf = 0.0;
                    373:        const unsigned int *matchinfo;
                    374:        int ncol;
                    375:        int nphrase;
                    376:        int iphrase;
                    377:        int ndoc;
                    378:        int doclen = 0;
                    379:        const double k = 3.75;
                    380:        /* Check that the number of arguments passed to this function is correct. */
                    381:        assert(nval == 1);
                    382:
                    383:        matchinfo = (const unsigned int *) sqlite3_value_blob(apval[0]);
                    384:        nphrase = matchinfo[0];
                    385:        ncol = matchinfo[1];
                    386:        ndoc = matchinfo[2 + 3 * ncol * nphrase + ncol];
                    387:        for (iphrase = 0; iphrase < nphrase; iphrase++) {
                    388:                int icol;
                    389:                const unsigned int *phraseinfo = &matchinfo[2 + ncol+ iphrase * ncol * 3];
                    390:                for(icol = 1; icol < ncol; icol++) {
                    391:
                    392:                        /* nhitcount: number of times the current phrase occurs in the current
                    393:                         *            column in the current document.
                    394:                         * nglobalhitcount: number of times current phrase occurs in the current
                    395:                         *                  column in all documents.
                    396:                         * ndocshitcount:   number of documents in which the current phrase
                    397:                         *                  occurs in the current column at least once.
                    398:                         */
                    399:                        int nhitcount = phraseinfo[3 * icol];
                    400:                        int nglobalhitcount = phraseinfo[3 * icol + 1];
                    401:                        int ndocshitcount = phraseinfo[3 * icol + 2];
                    402:                        doclen = matchinfo[2 + icol ];
                    403:                        double weight = col_weights[icol - 1];
                    404:                        if (idf->status == 0 && ndocshitcount)
                    405:                                idf->value += log(((double)ndoc / ndocshitcount))* weight;
                    406:
                    407:                        /* Dividing the tf by document length to normalize the effect of
                    408:                         * longer documents.
                    409:                         */
                    410:                        if (nglobalhitcount > 0 && nhitcount)
                    411:                                tf += (((double)nhitcount  * weight) / (nglobalhitcount * doclen));
                    412:                }
                    413:        }
                    414:        idf->status = 1;
                    415:
                    416:        /* Final score = (tf * idf)/ ( k + tf)
                    417:         *      Dividing by k+ tf further normalizes the weight leading to better
                    418:         *  results.
                    419:         *  The value of k is experimental
                    420:         */
                    421:        double score = (tf * idf->value/ ( k + tf)) ;
                    422:        sqlite3_result_double(pctx, score);
                    423:        return;
                    424: }
                    425:
                    426: /*
                    427:  *  run_query --
                    428:  *  Performs the searches for the keywords entered by the user.
                    429:  *  The 2nd param: snippet_args is an array of strings providing values for the
                    430:  *  last three parameters to the snippet function of sqlite. (Look at the docs).
                    431:  *  The 3rd param: args contains rest of the search parameters. Look at
                    432:  *  arpopos-utils.h for the description of individual fields.
                    433:  *
                    434:  */
                    435: int
                    436: run_query(sqlite3 *db, const char *snippet_args[3], query_args *args)
                    437: {
                    438:        const char *default_snippet_args[3];
                    439:        char *section_clause = NULL;
                    440:        char *limit_clause = NULL;
                    441:        char *machine_clause = NULL;
                    442:        char *query;
                    443:        const char *section;
                    444:        char *name;
                    445:        const char *name_desc;
                    446:        const char *machine;
                    447:        const char *snippet;
                    448:        char *m = NULL;
                    449:        int rc;
                    450:        inverse_document_frequency idf = {0, 0};
                    451:        sqlite3_stmt *stmt;
                    452:
                    453:        if (args->machine)
                    454:                easprintf(&machine_clause, "AND machine = \'%s\' ", args->machine);
                    455:
                    456:        /* Register the rank function */
                    457:        rc = sqlite3_create_function(db, "rank_func", 1, SQLITE_ANY, (void *)&idf,
                    458:                                     rank_func, NULL, NULL);
                    459:        if (rc != SQLITE_OK) {
                    460:                sqlite3_close(db);
                    461:                sqlite3_shutdown();
                    462:                errx(EXIT_FAILURE, "Unable to register the ranking function");
                    463:        }
                    464:
                    465:        /* We want to build a query of the form: "select x,y,z from mandb where
                    466:         * mandb match :query [AND (section LIKE '1' OR section LIKE '2' OR...)]
                    467:         * ORDER BY rank DESC..."
                    468:         * NOTES: 1. The portion in square brackets is optional, it will be there
                    469:         * only if the user has specified an option on the command line to search in
                    470:         * one or more specific sections.
                    471:         * 2. I am using LIKE operator because '=' or IN operators do not seem to be
                    472:         * working with the compression option enabled.
                    473:         */
                    474:
                    475:        if (args->sec_nums) {
                    476:                char *temp;
                    477:                int i;
                    478:
                    479:                for (i = 0; i < SECMAX; i++) {
                    480:                        if (args->sec_nums[i] == 0)
                    481:                                continue;
                    482:                        easprintf(&temp, " OR section = \'%d\'", i + 1);
                    483:                        if (section_clause) {
                    484:                                concat(&section_clause, temp);
                    485:                                free(temp);
                    486:                        } else {
                    487:                                section_clause = temp;
                    488:                        }
                    489:                }
                    490:                if (section_clause) {
                    491:                        /*
                    492:                         * At least one section requested, add glue for query.
                    493:                         */
                    494:                        temp = section_clause;
                    495:                        /* Skip " OR " before first term. */
                    496:                        easprintf(&section_clause, " AND (%s)", temp + 4);
                    497:                        free(temp);
                    498:                }
                    499:        }
                    500:        if (args->nrec >= 0) {
                    501:                /* Use the provided number of records and offset */
                    502:                easprintf(&limit_clause, " LIMIT %d OFFSET %d",
                    503:                    args->nrec, args->offset);
                    504:        }
                    505:
                    506:        if (snippet_args == NULL) {
                    507:                default_snippet_args[0] = "";
                    508:                default_snippet_args[1] = "";
                    509:                default_snippet_args[2] = "...";
                    510:                snippet_args = default_snippet_args;
                    511:        }
                    512:        query = sqlite3_mprintf("SELECT section, name, name_desc, machine,"
                    513:            " snippet(mandb, %Q, %Q, %Q, -1, 40 ),"
                    514:            " rank_func(matchinfo(mandb, \"pclxn\")) AS rank"
                    515:            " FROM mandb"
                    516:            " WHERE mandb MATCH %Q %s "
                    517:            "%s"
                    518:            " ORDER BY rank DESC"
                    519:            "%s",
                    520:            snippet_args[0], snippet_args[1], snippet_args[2], args->search_str,
                    521:            machine_clause ? machine_clause : "",
                    522:            section_clause ? section_clause : "",
                    523:            limit_clause ? limit_clause : "");
                    524:
                    525:        free(machine_clause);
                    526:        free(section_clause);
                    527:        free(limit_clause);
                    528:
                    529:        if (query == NULL) {
                    530:                *args->errmsg = estrdup("malloc failed");
                    531:                return -1;
                    532:        }
                    533:        rc = sqlite3_prepare_v2(db, query, -1, &stmt, NULL);
                    534:        if (rc == SQLITE_IOERR) {
                    535:                warnx("Corrupt database. Please rerun makemandb");
                    536:                sqlite3_free(query);
                    537:                return -1;
                    538:        } else if (rc != SQLITE_OK) {
                    539:                warnx("%s", sqlite3_errmsg(db));
                    540:                sqlite3_free(query);
                    541:                return -1;
                    542:        }
                    543:
                    544:        while (sqlite3_step(stmt) == SQLITE_ROW) {
                    545:                section = (const char *) sqlite3_column_text(stmt, 0);
                    546:                name_desc = (const char *) sqlite3_column_text(stmt, 2);
                    547:                machine = (const char *) sqlite3_column_text(stmt, 3);
                    548:                snippet = (const char *) sqlite3_column_text(stmt, 4);
                    549:                if (machine && machine[0]) {
                    550:                        m = estrdup(machine);
                    551:                        easprintf(&name, "%s/%s", lower(m),
                    552:                                sqlite3_column_text(stmt, 1));
                    553:                        free(m);
                    554:                } else {
                    555:                        name = estrdup((const char *) sqlite3_column_text(stmt, 1));
                    556:                }
                    557:
                    558:                (args->callback)(args->callback_data, section, name, name_desc, snippet,
                    559:                        strlen(snippet));
                    560:
                    561:                free(name);
                    562:        }
                    563:
                    564:        sqlite3_finalize(stmt);
                    565:        sqlite3_free(query);
                    566:        return *(args->errmsg) == NULL ? 0 : -1;
                    567: }
                    568:
                    569: /*
                    570:  * callback_html --
                    571:  *  Callback function for run_query_html. It builds the html output and then
                    572:  *  calls the actual user supplied callback function.
                    573:  */
                    574: static int
                    575: callback_html(void *data, const char *section, const char *name,
                    576:        const char *name_desc, const char *snippet, size_t snippet_length)
                    577: {
                    578:        const char *temp = snippet;
                    579:        int i = 0;
                    580:        size_t sz = 0;
                    581:        int count = 0;
                    582:        struct orig_callback_data *orig_data = (struct orig_callback_data *) data;
                    583:        int (*callback) (void *, const char *, const char *, const char *,
                    584:                const char *, size_t) = orig_data->callback;
                    585:
                    586:        /* First scan the snippet to find out the number of occurrences of {'>', '<'
                    587:         * '"', '&'}.
                    588:         * Then allocate a new buffer with sufficient space to be able to store the
                    589:         * quoted versions of the special characters {&gt;, &lt;, &quot;, &amp;}.
                    590:         * Copy over the characters from the original snippet to this buffer while
                    591:         * replacing the special characters with their quoted versions.
                    592:         */
                    593:
                    594:        while (*temp) {
                    595:                sz = strcspn(temp, "<>\"&\002\003");
                    596:                temp += sz + 1;
                    597:                count++;
                    598:        }
                    599:        size_t qsnippet_length = snippet_length + count * 5;
                    600:        char *qsnippet = emalloc(qsnippet_length + 1);
                    601:        sz = 0;
                    602:        while (*snippet) {
                    603:                sz = strcspn(snippet, "<>\"&\002\003");
                    604:                if (sz) {
                    605:                        memcpy(&qsnippet[i], snippet, sz);
                    606:                        snippet += sz;
                    607:                        i += sz;
                    608:                }
                    609:
                    610:                switch (*snippet++) {
                    611:                case '<':
                    612:                        memcpy(&qsnippet[i], "&lt;", 4);
                    613:                        i += 4;
                    614:                        break;
                    615:                case '>':
                    616:                        memcpy(&qsnippet[i], "&gt;", 4);
                    617:                        i += 4;
                    618:                        break;
                    619:                case '\"':
                    620:                        memcpy(&qsnippet[i], "&quot;", 6);
                    621:                        i += 6;
                    622:                        break;
                    623:                case '&':
                    624:                        /* Don't perform the quoting if this & is part of an mdoc escape
                    625:                         * sequence, e.g. \&
                    626:                         */
                    627:                        if (i && *(snippet - 2) != '\\') {
                    628:                                memcpy(&qsnippet[i], "&amp;", 5);
                    629:                                i += 5;
                    630:                        } else {
                    631:                                qsnippet[i++] = '&';
                    632:                        }
                    633:                        break;
                    634:                case '\002':
                    635:                        memcpy(&qsnippet[i], "<b>", 3);
                    636:                        i += 3;
                    637:                        break;
                    638:                case '\003':
                    639:                        memcpy(&qsnippet[i], "</b>", 4);
                    640:                        i += 4;
                    641:                        break;
                    642:                default:
                    643:                        break;
                    644:                }
                    645:        }
                    646:        qsnippet[++i] = 0;
                    647:        (*callback)(orig_data->data, section, name, name_desc,
                    648:                (const char *)qsnippet, qsnippet_length);
                    649:        free(qsnippet);
                    650:        return 0;
                    651: }
                    652:
                    653: /*
                    654:  * run_query_html --
                    655:  *  Utility function to output query result in HTML format.
                    656:  *  It internally calls run_query only, but it first passes the output to it's
                    657:  *  own custom callback function, which preprocess the snippet for quoting
                    658:  *  inline HTML fragments.
                    659:  *  After that it delegates the call the actual user supplied callback function.
                    660:  */
                    661: int
                    662: run_query_html(sqlite3 *db, query_args *args)
                    663: {
                    664:        struct orig_callback_data orig_data;
                    665:        orig_data.callback = args->callback;
                    666:        orig_data.data = args->callback_data;
                    667:        const char *snippet_args[] = {"\002", "\003", "..."};
                    668:        args->callback = &callback_html;
                    669:        args->callback_data = (void *) &orig_data;
                    670:        return run_query(db, snippet_args, args);
                    671: }
                    672:
                    673: /*
                    674:  * callback_pager --
                    675:  *  A callback similar to callback_html. It overstrikes the matching text in
                    676:  *  the snippet so that it appears emboldened when viewed using a pager like
                    677:  *  more or less.
                    678:  */
                    679: static int
                    680: callback_pager(void *data, const char *section, const char *name,
                    681:        const char *name_desc, const char *snippet, size_t snippet_length)
                    682: {
                    683:        struct orig_callback_data *orig_data = (struct orig_callback_data *) data;
                    684:        char *psnippet;
                    685:        const char *temp = snippet;
                    686:        int count = 0;
                    687:        int i = 0;
                    688:        size_t sz = 0;
                    689:        size_t psnippet_length;
                    690:
                    691:        /* Count the number of bytes of matching text. For each of these bytes we
                    692:         * will use 2 extra bytes to overstrike it so that it appears bold when
                    693:         * viewed using a pager.
                    694:         */
                    695:        while (*temp) {
                    696:                sz = strcspn(temp, "\002\003");
                    697:                temp += sz;
                    698:                if (*temp == '\003') {
                    699:                        count += 2 * (sz);
                    700:                }
                    701:                temp++;
                    702:        }
                    703:
                    704:        psnippet_length = snippet_length + count;
                    705:        psnippet = emalloc(psnippet_length + 1);
                    706:
                    707:        /* Copy the bytes from snippet to psnippet:
                    708:         * 1. Copy the bytes before \002 as it is.
                    709:         * 2. The bytes after \002 need to be overstriked till we encounter \003.
                    710:         * 3. To overstrike a byte 'A' we need to write 'A\bA'
                    711:         */
                    712:        while (*snippet) {
                    713:                sz = strcspn(snippet, "\002");
                    714:                memcpy(&psnippet[i], snippet, sz);
                    715:                snippet += sz;
                    716:                i += sz;
                    717:
                    718:                /* Don't change this. Advancing the pointer without reading the byte
                    719:                 * is causing strange behavior.
                    720:                 */
                    721:                if (*snippet == '\002')
                    722:                        snippet++;
                    723:                while (*snippet && *snippet != '\003') {
                    724:                        psnippet[i++] = *snippet;
                    725:                        psnippet[i++] = '\b';
                    726:                        psnippet[i++] = *snippet++;
                    727:                }
                    728:                if (*snippet)
                    729:                        snippet++;
                    730:        }
                    731:
                    732:        psnippet[i] = 0;
                    733:        (orig_data->callback)(orig_data->data, section, name, name_desc, psnippet,
                    734:                psnippet_length);
                    735:        free(psnippet);
                    736:        return 0;
                    737: }
                    738:
                    739: /*
                    740:  * run_query_pager --
                    741:  *  Utility function similar to run_query_html. This function tries to
                    742:  *  pre-process the result assuming it will be piped to a pager.
                    743:  *  For this purpose it first calls it's own callback function callback_pager
                    744:  *  which then delegates the call to the user supplied callback.
                    745:  */
                    746: int run_query_pager(sqlite3 *db, query_args *args)
                    747: {
                    748:        struct orig_callback_data orig_data;
                    749:        orig_data.callback = args->callback;
                    750:        orig_data.data = args->callback_data;
                    751:        const char *snippet_args[] = {"\002", "\003", "..."};
                    752:        args->callback = &callback_pager;
                    753:        args->callback_data = (void *) &orig_data;
                    754:        return run_query(db, snippet_args, args);
                    755: }

CVSweb <webmaster@jp.NetBSD.org>