[BACK]Return to apropos-utils.c CVS log [TXT][DIR] Up to [cvs.NetBSD.org] / src / usr.sbin / makemandb

Annotation of src/usr.sbin/makemandb/apropos-utils.c, Revision 1.4.2.5

1.4.2.5 ! yamt        1: /*     $NetBSD: apropos-utils.c,v 1.4.2.4 2012/10/30 19:00:37 yamt Exp $       */
1.4.2.2   yamt        2: /*-
                      3:  * Copyright (c) 2011 Abhinav Upadhyay <er.abhinav.upadhyay@gmail.com>
                      4:  * All rights reserved.
                      5:  *
                      6:  * This code was developed as part of Google's Summer of Code 2011 program.
                      7:  *
                      8:  * Redistribution and use in source and binary forms, with or without
                      9:  * modification, are permitted provided that the following conditions
                     10:  * are met:
                     11:  *
                     12:  * 1. Redistributions of source code must retain the above copyright
                     13:  *    notice, this list of conditions and the following disclaimer.
                     14:  * 2. Redistributions in binary form must reproduce the above copyright
                     15:  *    notice, this list of conditions and the following disclaimer in
                     16:  *    the documentation and/or other materials provided with the
                     17:  *    distribution.
                     18:  *
                     19:  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
                     20:  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
                     21:  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
                     22:  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
                     23:  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
                     24:  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
                     25:  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
                     26:  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
                     27:  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
                     28:  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
                     29:  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
                     30:  * SUCH DAMAGE.
                     31:  */
                     32:
                     33: #include <sys/cdefs.h>
1.4.2.5 ! yamt       34: __RCSID("$NetBSD: apropos-utils.c,v 1.4.2.4 2012/10/30 19:00:37 yamt Exp $");
1.4.2.2   yamt       35:
1.4.2.4   yamt       36: #include <sys/queue.h>
1.4.2.2   yamt       37: #include <sys/stat.h>
                     38:
                     39: #include <assert.h>
                     40: #include <ctype.h>
                     41: #include <err.h>
                     42: #include <math.h>
                     43: #include <stdio.h>
                     44: #include <stdlib.h>
                     45: #include <string.h>
                     46: #include <util.h>
                     47: #include <zlib.h>
1.4.2.5 ! yamt       48: #include <term.h>
        !            49: #undef tab     // XXX: manconf.h
1.4.2.2   yamt       50:
                     51: #include "apropos-utils.h"
1.4.2.4   yamt       52: #include "manconf.h"
1.4.2.5 ! yamt       53: #include "dist/mandoc.h"
1.4.2.2   yamt       54: #include "sqlite3.h"
                     55:
                     56: typedef struct orig_callback_data {
                     57:        void *data;
                     58:        int (*callback) (void *, const char *, const char *, const char *,
                     59:                const char *, size_t);
                     60: } orig_callback_data;
                     61:
                     62: typedef struct inverse_document_frequency {
                     63:        double value;
                     64:        int status;
                     65: } inverse_document_frequency;
                     66:
                     67: /* weights for individual columns */
                     68: static const double col_weights[] = {
                     69:        2.0,    // NAME
                     70:        2.00,   // Name-description
                     71:        0.55,   // DESCRIPTION
                     72:        0.10,   // LIBRARY
                     73:        0.001,  //RETURN VALUES
                     74:        0.20,   //ENVIRONMENT
                     75:        0.01,   //FILES
                     76:        0.001,  //EXIT STATUS
                     77:        2.00,   //DIAGNOSTICS
                     78:        0.05,   //ERRORS
                     79:        0.00,   //md5_hash
                     80:        1.00    //machine
                     81: };
                     82:
                     83: /*
                     84:  * lower --
                     85:  *  Converts the string str to lower case
                     86:  */
                     87: char *
                     88: lower(char *str)
                     89: {
                     90:        assert(str);
                     91:        int i = 0;
                     92:        char c;
                     93:        while (str[i] != '\0') {
                     94:                c = tolower((unsigned char) str[i]);
                     95:                str[i++] = c;
                     96:        }
                     97:        return str;
                     98: }
                     99:
                    100: /*
                    101: * concat--
                    102: *  Utility function. Concatenates together: dst, a space character and src.
                    103: * dst + " " + src
                    104: */
                    105: void
                    106: concat(char **dst, const char *src)
                    107: {
                    108:        concat2(dst, src, strlen(src));
                    109: }
                    110:
                    111: void
                    112: concat2(char **dst, const char *src, size_t srclen)
                    113: {
                    114:        size_t total_len, dst_len;
                    115:        assert(src != NULL);
                    116:
                    117:        /* If destination buffer dst is NULL, then simply strdup the source buffer */
                    118:        if (*dst == NULL) {
                    119:                *dst = estrdup(src);
                    120:                return;
                    121:        }
                    122:
                    123:        dst_len = strlen(*dst);
                    124:        /*
                    125:         * NUL Byte and separator space
                    126:         */
                    127:        total_len = dst_len + srclen + 2;
                    128:
                    129:        *dst = erealloc(*dst, total_len);
                    130:
                    131:        /* Append a space at the end of dst */
                    132:        (*dst)[dst_len++] = ' ';
                    133:
                    134:        /* Now, copy src at the end of dst */
                    135:        memcpy(*dst + dst_len, src, srclen + 1);
                    136: }
                    137:
                    138: void
                    139: close_db(sqlite3 *db)
                    140: {
                    141:        sqlite3_close(db);
                    142:        sqlite3_shutdown();
                    143: }
                    144:
                    145: /*
                    146:  * create_db --
                    147:  *  Creates the database schema.
                    148:  */
                    149: static int
                    150: create_db(sqlite3 *db)
                    151: {
                    152:        const char *sqlstr = NULL;
                    153:        char *schemasql;
                    154:        char *errmsg = NULL;
                    155:
                    156: /*------------------------ Create the tables------------------------------*/
                    157:
                    158: #if NOTYET
                    159:        sqlite3_exec(db, "PRAGMA journal_mode = WAL", NULL, NULL, NULL);
                    160: #else
                    161:        sqlite3_exec(db, "PRAGMA journal_mode = DELETE", NULL, NULL, NULL);
                    162: #endif
                    163:
                    164:        schemasql = sqlite3_mprintf("PRAGMA user_version = %d",
                    165:            APROPOS_SCHEMA_VERSION);
                    166:        sqlite3_exec(db, schemasql, NULL, NULL, &errmsg);
                    167:        if (errmsg != NULL)
                    168:                goto out;
                    169:        sqlite3_free(schemasql);
                    170:
                    171:        sqlstr = "CREATE VIRTUAL TABLE mandb USING fts4(section, name, "
                    172:                            "name_desc, desc, lib, return_vals, env, files, "
                    173:                            "exit_status, diagnostics, errors, md5_hash UNIQUE, machine, "
                    174:                            "compress=zip, uncompress=unzip, tokenize=porter); "        //mandb
                    175:                        "CREATE TABLE IF NOT EXISTS mandb_meta(device, inode, mtime, "
                    176:                            "file UNIQUE, md5_hash UNIQUE, id  INTEGER PRIMARY KEY); "
                    177:                                //mandb_meta
                    178:                        "CREATE TABLE IF NOT EXISTS mandb_links(link, target, section, "
1.4.2.3   yamt      179:                            "machine, md5_hash); ";     //mandb_links
1.4.2.2   yamt      180:
                    181:        sqlite3_exec(db, sqlstr, NULL, NULL, &errmsg);
                    182:        if (errmsg != NULL)
                    183:                goto out;
                    184:
                    185:        sqlstr = "CREATE INDEX IF NOT EXISTS index_mandb_links ON mandb_links "
                    186:                        "(link); "
                    187:                        "CREATE INDEX IF NOT EXISTS index_mandb_meta_dev ON mandb_meta "
1.4.2.3   yamt      188:                        "(device, inode); "
                    189:                        "CREATE INDEX IF NOT EXISTS index_mandb_links_md5 ON mandb_links "
                    190:                        "(md5_hash);";
1.4.2.2   yamt      191:        sqlite3_exec(db, sqlstr, NULL, NULL, &errmsg);
                    192:        if (errmsg != NULL)
                    193:                goto out;
                    194:        return 0;
                    195:
                    196: out:
                    197:        warnx("%s", errmsg);
                    198:        free(errmsg);
                    199:        sqlite3_close(db);
                    200:        sqlite3_shutdown();
                    201:        return -1;
                    202: }
                    203:
                    204: /*
                    205:  * zip --
                    206:  *  User defined Sqlite function to compress the FTS table
                    207:  */
                    208: static void
                    209: zip(sqlite3_context *pctx, int nval, sqlite3_value **apval)
                    210: {
                    211:        int nin;
                    212:        long int nout;
                    213:        const unsigned char * inbuf;
                    214:        unsigned char *outbuf;
                    215:
                    216:        assert(nval == 1);
                    217:        nin = sqlite3_value_bytes(apval[0]);
                    218:        inbuf = (const unsigned char *) sqlite3_value_blob(apval[0]);
                    219:        nout = nin + 13 + (nin + 999) / 1000;
                    220:        outbuf = emalloc(nout);
                    221:        compress(outbuf, (unsigned long *) &nout, inbuf, nin);
                    222:        sqlite3_result_blob(pctx, outbuf, nout, free);
                    223: }
                    224:
                    225: /*
                    226:  * unzip --
                    227:  *  User defined Sqlite function to uncompress the FTS table.
                    228:  */
                    229: static void
                    230: unzip(sqlite3_context *pctx, int nval, sqlite3_value **apval)
                    231: {
                    232:        unsigned int rc;
                    233:        unsigned char *outbuf;
                    234:        z_stream stream;
                    235:
                    236:        assert(nval == 1);
                    237:        stream.next_in = __UNCONST(sqlite3_value_blob(apval[0]));
                    238:        stream.avail_in = sqlite3_value_bytes(apval[0]);
                    239:        stream.avail_out = stream.avail_in * 2 + 100;
                    240:        stream.next_out = outbuf = emalloc(stream.avail_out);
                    241:        stream.zalloc = NULL;
                    242:        stream.zfree = NULL;
                    243:
                    244:        if (inflateInit(&stream) != Z_OK) {
                    245:                free(outbuf);
                    246:                return;
                    247:        }
                    248:
                    249:        while ((rc = inflate(&stream, Z_SYNC_FLUSH)) != Z_STREAM_END) {
                    250:                if (rc != Z_OK ||
                    251:                    (stream.avail_out != 0 && stream.avail_in == 0)) {
                    252:                        free(outbuf);
                    253:                        return;
                    254:                }
                    255:                outbuf = erealloc(outbuf, stream.total_out * 2);
                    256:                stream.next_out = outbuf + stream.total_out;
                    257:                stream.avail_out = stream.total_out;
                    258:        }
                    259:        if (inflateEnd(&stream) != Z_OK) {
                    260:                free(outbuf);
                    261:                return;
                    262:        }
                    263:        outbuf = erealloc(outbuf, stream.total_out);
                    264:        sqlite3_result_text(pctx, (const char *) outbuf, stream.total_out, free);
                    265: }
                    266:
1.4.2.4   yamt      267: /*
                    268:  * get_dbpath --
                    269:  *   Read the path of the database from man.conf and return.
                    270:  */
                    271: char *
                    272: get_dbpath(const char *manconf)
                    273: {
                    274:        TAG *tp;
                    275:        char *dbpath;
                    276:
                    277:        config(manconf);
                    278:        tp = gettag("_mandb", 1);
                    279:        if (!tp)
                    280:                return NULL;
                    281:
                    282:        if (TAILQ_EMPTY(&tp->entrylist))
                    283:                return NULL;
                    284:
                    285:        dbpath = TAILQ_LAST(&tp->entrylist, tqh)->s;
                    286:        return dbpath;
                    287: }
                    288:
1.4.2.2   yamt      289: /* init_db --
                    290:  *   Prepare the database. Register the compress/uncompress functions and the
                    291:  *   stopword tokenizer.
                    292:  *      db_flag specifies the mode in which to open the database. 3 options are
                    293:  *   available:
                    294:  *     1. DB_READONLY: Open in READONLY mode. An error if db does not exist.
                    295:  *     2. DB_READWRITE: Open in read-write mode. An error if db does not exist.
                    296:  *     3. DB_CREATE: Open in read-write mode. It will try to create the db if
                    297:  *                     it does not exist already.
                    298:  *  RETURN VALUES:
                    299:  *             The function will return NULL in case the db does not exist and DB_CREATE
                    300:  *     was not specified. And in case DB_CREATE was specified and yet NULL is
                    301:  *     returned, then there was some other error.
                    302:  *     In normal cases the function should return a handle to the db.
                    303:  */
                    304: sqlite3 *
1.4.2.4   yamt      305: init_db(int db_flag, const char *manconf)
1.4.2.2   yamt      306: {
                    307:        sqlite3 *db = NULL;
                    308:        sqlite3_stmt *stmt;
                    309:        struct stat sb;
                    310:        int rc;
                    311:        int create_db_flag = 0;
                    312:
1.4.2.4   yamt      313:        char *dbpath = get_dbpath(manconf);
                    314:        if (dbpath == NULL)
                    315:                errx(EXIT_FAILURE, "_mandb entry not found in man.conf");
1.4.2.2   yamt      316:        /* Check if the database exists or not */
1.4.2.4   yamt      317:        if (!(stat(dbpath, &sb) == 0 && S_ISREG(sb.st_mode))) {
1.4.2.2   yamt      318:                /* Database does not exist, check if DB_CREATE was specified, and set
                    319:                 * flag to create the database schema
                    320:                 */
                    321:                if (db_flag != (MANDB_CREATE)) {
                    322:                        warnx("Missing apropos database. "
                    323:                              "Please run makemandb to create it.");
                    324:                        return NULL;
                    325:                }
                    326:                create_db_flag = 1;
                    327:        }
                    328:
                    329:        /* Now initialize the database connection */
                    330:        sqlite3_initialize();
1.4.2.4   yamt      331:        rc = sqlite3_open_v2(dbpath, &db, db_flag, NULL);
1.4.2.2   yamt      332:
                    333:        if (rc != SQLITE_OK) {
                    334:                warnx("%s", sqlite3_errmsg(db));
                    335:                sqlite3_shutdown();
                    336:                return NULL;
                    337:        }
                    338:
                    339:        if (create_db_flag && create_db(db) < 0) {
                    340:                warnx("%s", "Unable to create database schema");
                    341:                goto error;
                    342:        }
                    343:
                    344:        rc = sqlite3_prepare_v2(db, "PRAGMA user_version", -1, &stmt, NULL);
                    345:        if (rc != SQLITE_OK) {
                    346:                warnx("Unable to query schema version: %s",
                    347:                    sqlite3_errmsg(db));
                    348:                goto error;
                    349:        }
                    350:        if (sqlite3_step(stmt) != SQLITE_ROW) {
                    351:                sqlite3_finalize(stmt);
                    352:                warnx("Unable to query schema version: %s",
                    353:                    sqlite3_errmsg(db));
                    354:                goto error;
                    355:        }
                    356:        if (sqlite3_column_int(stmt, 0) != APROPOS_SCHEMA_VERSION) {
                    357:                sqlite3_finalize(stmt);
                    358:                warnx("Incorrect schema version found. "
                    359:                      "Please run makemandb -f.");
                    360:                goto error;
                    361:        }
                    362:        sqlite3_finalize(stmt);
                    363:
                    364:        sqlite3_extended_result_codes(db, 1);
                    365:
                    366:        /* Register the zip and unzip functions for FTS compression */
                    367:        rc = sqlite3_create_function(db, "zip", 1, SQLITE_ANY, NULL, zip, NULL, NULL);
                    368:        if (rc != SQLITE_OK) {
                    369:                warnx("Unable to register function: compress: %s",
                    370:                    sqlite3_errmsg(db));
                    371:                goto error;
                    372:        }
                    373:
                    374:        rc = sqlite3_create_function(db, "unzip", 1, SQLITE_ANY, NULL,
                    375:                                  unzip, NULL, NULL);
                    376:        if (rc != SQLITE_OK) {
                    377:                warnx("Unable to register function: uncompress: %s",
                    378:                    sqlite3_errmsg(db));
                    379:                goto error;
                    380:        }
                    381:        return db;
1.4.2.4   yamt      382:
1.4.2.2   yamt      383: error:
                    384:        sqlite3_close(db);
                    385:        sqlite3_shutdown();
                    386:        return NULL;
                    387: }
                    388:
                    389: /*
                    390:  * rank_func --
                    391:  *  Sqlite user defined function for ranking the documents.
                    392:  *  For each phrase of the query, it computes the tf and idf and adds them over.
                    393:  *  It computes the final rank, by multiplying tf and idf together.
                    394:  *  Weight of term t for document d = (term frequency of t in d *
                    395:  *                                      inverse document frequency of t)
                    396:  *
                    397:  *  Term Frequency of term t in document d = Number of times t occurs in d /
                    398:  *                                             Number of times t appears in all
                    399:  *                                                                                     documents
                    400:  *
                    401:  *  Inverse document frequency of t = log(Total number of documents /
                    402:  *                                                                             Number of documents in which t occurs)
                    403:  */
                    404: static void
                    405: rank_func(sqlite3_context *pctx, int nval, sqlite3_value **apval)
                    406: {
                    407:        inverse_document_frequency *idf = sqlite3_user_data(pctx);
                    408:        double tf = 0.0;
                    409:        const unsigned int *matchinfo;
                    410:        int ncol;
                    411:        int nphrase;
                    412:        int iphrase;
                    413:        int ndoc;
                    414:        int doclen = 0;
                    415:        const double k = 3.75;
                    416:        /* Check that the number of arguments passed to this function is correct. */
                    417:        assert(nval == 1);
                    418:
                    419:        matchinfo = (const unsigned int *) sqlite3_value_blob(apval[0]);
                    420:        nphrase = matchinfo[0];
                    421:        ncol = matchinfo[1];
                    422:        ndoc = matchinfo[2 + 3 * ncol * nphrase + ncol];
                    423:        for (iphrase = 0; iphrase < nphrase; iphrase++) {
                    424:                int icol;
                    425:                const unsigned int *phraseinfo = &matchinfo[2 + ncol+ iphrase * ncol * 3];
                    426:                for(icol = 1; icol < ncol; icol++) {
                    427:
                    428:                        /* nhitcount: number of times the current phrase occurs in the current
                    429:                         *            column in the current document.
                    430:                         * nglobalhitcount: number of times current phrase occurs in the current
                    431:                         *                  column in all documents.
                    432:                         * ndocshitcount:   number of documents in which the current phrase
                    433:                         *                  occurs in the current column at least once.
                    434:                         */
                    435:                        int nhitcount = phraseinfo[3 * icol];
                    436:                        int nglobalhitcount = phraseinfo[3 * icol + 1];
                    437:                        int ndocshitcount = phraseinfo[3 * icol + 2];
                    438:                        doclen = matchinfo[2 + icol ];
                    439:                        double weight = col_weights[icol - 1];
                    440:                        if (idf->status == 0 && ndocshitcount)
                    441:                                idf->value += log(((double)ndoc / ndocshitcount))* weight;
                    442:
                    443:                        /* Dividing the tf by document length to normalize the effect of
                    444:                         * longer documents.
                    445:                         */
                    446:                        if (nglobalhitcount > 0 && nhitcount)
                    447:                                tf += (((double)nhitcount  * weight) / (nglobalhitcount * doclen));
                    448:                }
                    449:        }
                    450:        idf->status = 1;
                    451:
                    452:        /* Final score = (tf * idf)/ ( k + tf)
                    453:         *      Dividing by k+ tf further normalizes the weight leading to better
                    454:         *  results.
                    455:         *  The value of k is experimental
                    456:         */
                    457:        double score = (tf * idf->value/ ( k + tf)) ;
                    458:        sqlite3_result_double(pctx, score);
                    459:        return;
                    460: }
                    461:
                    462: /*
                    463:  *  run_query --
                    464:  *  Performs the searches for the keywords entered by the user.
                    465:  *  The 2nd param: snippet_args is an array of strings providing values for the
                    466:  *  last three parameters to the snippet function of sqlite. (Look at the docs).
                    467:  *  The 3rd param: args contains rest of the search parameters. Look at
                    468:  *  arpopos-utils.h for the description of individual fields.
                    469:  *
                    470:  */
                    471: int
                    472: run_query(sqlite3 *db, const char *snippet_args[3], query_args *args)
                    473: {
                    474:        const char *default_snippet_args[3];
                    475:        char *section_clause = NULL;
                    476:        char *limit_clause = NULL;
                    477:        char *machine_clause = NULL;
                    478:        char *query;
                    479:        const char *section;
                    480:        char *name;
                    481:        const char *name_desc;
                    482:        const char *machine;
                    483:        const char *snippet;
                    484:        const char *name_temp;
                    485:        char *slash_ptr;
                    486:        char *m = NULL;
                    487:        int rc;
                    488:        inverse_document_frequency idf = {0, 0};
                    489:        sqlite3_stmt *stmt;
                    490:
                    491:        if (args->machine)
                    492:                easprintf(&machine_clause, "AND machine = \'%s\' ", args->machine);
                    493:
                    494:        /* Register the rank function */
                    495:        rc = sqlite3_create_function(db, "rank_func", 1, SQLITE_ANY, (void *)&idf,
                    496:                                     rank_func, NULL, NULL);
                    497:        if (rc != SQLITE_OK) {
                    498:                warnx("Unable to register the ranking function: %s",
                    499:                    sqlite3_errmsg(db));
                    500:                sqlite3_close(db);
                    501:                sqlite3_shutdown();
                    502:                exit(EXIT_FAILURE);
                    503:        }
                    504:
                    505:        /* We want to build a query of the form: "select x,y,z from mandb where
                    506:         * mandb match :query [AND (section LIKE '1' OR section LIKE '2' OR...)]
                    507:         * ORDER BY rank DESC..."
                    508:         * NOTES: 1. The portion in square brackets is optional, it will be there
                    509:         * only if the user has specified an option on the command line to search in
                    510:         * one or more specific sections.
                    511:         * 2. I am using LIKE operator because '=' or IN operators do not seem to be
                    512:         * working with the compression option enabled.
                    513:         */
                    514:
                    515:        if (args->sec_nums) {
                    516:                char *temp;
                    517:                int i;
                    518:
                    519:                for (i = 0; i < SECMAX; i++) {
                    520:                        if (args->sec_nums[i] == 0)
                    521:                                continue;
                    522:                        easprintf(&temp, " OR section = \'%d\'", i + 1);
                    523:                        if (section_clause) {
                    524:                                concat(&section_clause, temp);
                    525:                                free(temp);
                    526:                        } else {
                    527:                                section_clause = temp;
                    528:                        }
                    529:                }
                    530:                if (section_clause) {
                    531:                        /*
                    532:                         * At least one section requested, add glue for query.
                    533:                         */
                    534:                        temp = section_clause;
                    535:                        /* Skip " OR " before first term. */
                    536:                        easprintf(&section_clause, " AND (%s)", temp + 4);
                    537:                        free(temp);
                    538:                }
                    539:        }
                    540:        if (args->nrec >= 0) {
                    541:                /* Use the provided number of records and offset */
                    542:                easprintf(&limit_clause, " LIMIT %d OFFSET %d",
                    543:                    args->nrec, args->offset);
                    544:        }
                    545:
                    546:        if (snippet_args == NULL) {
                    547:                default_snippet_args[0] = "";
                    548:                default_snippet_args[1] = "";
                    549:                default_snippet_args[2] = "...";
                    550:                snippet_args = default_snippet_args;
                    551:        }
                    552:        query = sqlite3_mprintf("SELECT section, name, name_desc, machine,"
                    553:            " snippet(mandb, %Q, %Q, %Q, -1, 40 ),"
                    554:            " rank_func(matchinfo(mandb, \"pclxn\")) AS rank"
                    555:            " FROM mandb"
                    556:            " WHERE mandb MATCH %Q %s "
                    557:            "%s"
                    558:            " ORDER BY rank DESC"
                    559:            "%s",
                    560:            snippet_args[0], snippet_args[1], snippet_args[2], args->search_str,
                    561:            machine_clause ? machine_clause : "",
                    562:            section_clause ? section_clause : "",
                    563:            limit_clause ? limit_clause : "");
                    564:
                    565:        free(machine_clause);
                    566:        free(section_clause);
                    567:        free(limit_clause);
                    568:
                    569:        if (query == NULL) {
                    570:                *args->errmsg = estrdup("malloc failed");
                    571:                return -1;
                    572:        }
                    573:        rc = sqlite3_prepare_v2(db, query, -1, &stmt, NULL);
                    574:        if (rc == SQLITE_IOERR) {
                    575:                warnx("Corrupt database. Please rerun makemandb");
                    576:                sqlite3_free(query);
                    577:                return -1;
                    578:        } else if (rc != SQLITE_OK) {
                    579:                warnx("%s", sqlite3_errmsg(db));
                    580:                sqlite3_free(query);
                    581:                return -1;
                    582:        }
                    583:
                    584:        while (sqlite3_step(stmt) == SQLITE_ROW) {
                    585:                section = (const char *) sqlite3_column_text(stmt, 0);
                    586:                name_temp = (const char *) sqlite3_column_text(stmt, 1);
                    587:                name_desc = (const char *) sqlite3_column_text(stmt, 2);
                    588:                machine = (const char *) sqlite3_column_text(stmt, 3);
                    589:                snippet = (const char *) sqlite3_column_text(stmt, 4);
                    590:                if ((slash_ptr = strrchr(name_temp, '/')) != NULL)
                    591:                        name_temp = slash_ptr + 1;
                    592:                if (machine && machine[0]) {
                    593:                        m = estrdup(machine);
                    594:                        easprintf(&name, "%s/%s", lower(m),
                    595:                                name_temp);
                    596:                        free(m);
                    597:                } else {
                    598:                        name = estrdup((const char *) sqlite3_column_text(stmt, 1));
                    599:                }
                    600:
                    601:                (args->callback)(args->callback_data, section, name, name_desc, snippet,
                    602:                        strlen(snippet));
                    603:
                    604:                free(name);
                    605:        }
                    606:
                    607:        sqlite3_finalize(stmt);
                    608:        sqlite3_free(query);
                    609:        return *(args->errmsg) == NULL ? 0 : -1;
                    610: }
                    611:
                    612: /*
                    613:  * callback_html --
                    614:  *  Callback function for run_query_html. It builds the html output and then
                    615:  *  calls the actual user supplied callback function.
                    616:  */
                    617: static int
                    618: callback_html(void *data, const char *section, const char *name,
                    619:        const char *name_desc, const char *snippet, size_t snippet_length)
                    620: {
                    621:        const char *temp = snippet;
                    622:        int i = 0;
                    623:        size_t sz = 0;
                    624:        int count = 0;
                    625:        struct orig_callback_data *orig_data = (struct orig_callback_data *) data;
                    626:        int (*callback) (void *, const char *, const char *, const char *,
                    627:                const char *, size_t) = orig_data->callback;
                    628:
                    629:        /* First scan the snippet to find out the number of occurrences of {'>', '<'
                    630:         * '"', '&'}.
                    631:         * Then allocate a new buffer with sufficient space to be able to store the
                    632:         * quoted versions of the special characters {&gt;, &lt;, &quot;, &amp;}.
                    633:         * Copy over the characters from the original snippet to this buffer while
                    634:         * replacing the special characters with their quoted versions.
                    635:         */
                    636:
                    637:        while (*temp) {
                    638:                sz = strcspn(temp, "<>\"&\002\003");
                    639:                temp += sz + 1;
                    640:                count++;
                    641:        }
                    642:        size_t qsnippet_length = snippet_length + count * 5;
                    643:        char *qsnippet = emalloc(qsnippet_length + 1);
                    644:        sz = 0;
                    645:        while (*snippet) {
                    646:                sz = strcspn(snippet, "<>\"&\002\003");
                    647:                if (sz) {
                    648:                        memcpy(&qsnippet[i], snippet, sz);
                    649:                        snippet += sz;
                    650:                        i += sz;
                    651:                }
                    652:
                    653:                switch (*snippet++) {
                    654:                case '<':
                    655:                        memcpy(&qsnippet[i], "&lt;", 4);
                    656:                        i += 4;
                    657:                        break;
                    658:                case '>':
                    659:                        memcpy(&qsnippet[i], "&gt;", 4);
                    660:                        i += 4;
                    661:                        break;
                    662:                case '\"':
                    663:                        memcpy(&qsnippet[i], "&quot;", 6);
                    664:                        i += 6;
                    665:                        break;
                    666:                case '&':
                    667:                        /* Don't perform the quoting if this & is part of an mdoc escape
                    668:                         * sequence, e.g. \&
                    669:                         */
                    670:                        if (i && *(snippet - 2) != '\\') {
                    671:                                memcpy(&qsnippet[i], "&amp;", 5);
                    672:                                i += 5;
                    673:                        } else {
                    674:                                qsnippet[i++] = '&';
                    675:                        }
                    676:                        break;
                    677:                case '\002':
                    678:                        memcpy(&qsnippet[i], "<b>", 3);
                    679:                        i += 3;
                    680:                        break;
                    681:                case '\003':
                    682:                        memcpy(&qsnippet[i], "</b>", 4);
                    683:                        i += 4;
                    684:                        break;
                    685:                default:
                    686:                        break;
                    687:                }
                    688:        }
                    689:        qsnippet[++i] = 0;
                    690:        (*callback)(orig_data->data, section, name, name_desc,
                    691:                (const char *)qsnippet, qsnippet_length);
                    692:        free(qsnippet);
                    693:        return 0;
                    694: }
                    695:
                    696: /*
                    697:  * run_query_html --
                    698:  *  Utility function to output query result in HTML format.
                    699:  *  It internally calls run_query only, but it first passes the output to it's
                    700:  *  own custom callback function, which preprocess the snippet for quoting
                    701:  *  inline HTML fragments.
                    702:  *  After that it delegates the call the actual user supplied callback function.
                    703:  */
                    704: int
                    705: run_query_html(sqlite3 *db, query_args *args)
                    706: {
                    707:        struct orig_callback_data orig_data;
                    708:        orig_data.callback = args->callback;
                    709:        orig_data.data = args->callback_data;
                    710:        const char *snippet_args[] = {"\002", "\003", "..."};
                    711:        args->callback = &callback_html;
                    712:        args->callback_data = (void *) &orig_data;
                    713:        return run_query(db, snippet_args, args);
                    714: }
                    715:
                    716: /*
1.4.2.5 ! yamt      717:  * underline a string, pager style.
        !           718:  */
        !           719: static char *
        !           720: ul_pager(const char *s)
        !           721: {
        !           722:        size_t len;
        !           723:        char *dst, *d;
        !           724:
        !           725:        // a -> _\ba
        !           726:        len = strlen(s) * 3 + 1;
        !           727:
        !           728:        d = dst = emalloc(len);
        !           729:        while (*s) {
        !           730:                *d++ = '_';
        !           731:                *d++ = '\b';
        !           732:                *d++ = *s++;
        !           733:        }
        !           734:        *d = '\0';
        !           735:        return dst;
        !           736: }
        !           737:
        !           738: /*
1.4.2.2   yamt      739:  * callback_pager --
                    740:  *  A callback similar to callback_html. It overstrikes the matching text in
                    741:  *  the snippet so that it appears emboldened when viewed using a pager like
                    742:  *  more or less.
                    743:  */
                    744: static int
                    745: callback_pager(void *data, const char *section, const char *name,
                    746:        const char *name_desc, const char *snippet, size_t snippet_length)
                    747: {
                    748:        struct orig_callback_data *orig_data = (struct orig_callback_data *) data;
                    749:        char *psnippet;
                    750:        const char *temp = snippet;
                    751:        int count = 0;
                    752:        int i = 0;
                    753:        size_t sz = 0;
                    754:        size_t psnippet_length;
                    755:
                    756:        /* Count the number of bytes of matching text. For each of these bytes we
                    757:         * will use 2 extra bytes to overstrike it so that it appears bold when
                    758:         * viewed using a pager.
                    759:         */
                    760:        while (*temp) {
                    761:                sz = strcspn(temp, "\002\003");
                    762:                temp += sz;
                    763:                if (*temp == '\003') {
                    764:                        count += 2 * (sz);
                    765:                }
                    766:                temp++;
                    767:        }
                    768:
                    769:        psnippet_length = snippet_length + count;
                    770:        psnippet = emalloc(psnippet_length + 1);
                    771:
                    772:        /* Copy the bytes from snippet to psnippet:
                    773:         * 1. Copy the bytes before \002 as it is.
                    774:         * 2. The bytes after \002 need to be overstriked till we encounter \003.
                    775:         * 3. To overstrike a byte 'A' we need to write 'A\bA'
                    776:         */
                    777:        while (*snippet) {
                    778:                sz = strcspn(snippet, "\002");
                    779:                memcpy(&psnippet[i], snippet, sz);
                    780:                snippet += sz;
                    781:                i += sz;
                    782:
                    783:                /* Don't change this. Advancing the pointer without reading the byte
                    784:                 * is causing strange behavior.
                    785:                 */
                    786:                if (*snippet == '\002')
                    787:                        snippet++;
                    788:                while (*snippet && *snippet != '\003') {
                    789:                        psnippet[i++] = *snippet;
                    790:                        psnippet[i++] = '\b';
                    791:                        psnippet[i++] = *snippet++;
                    792:                }
                    793:                if (*snippet)
                    794:                        snippet++;
                    795:        }
                    796:
                    797:        psnippet[i] = 0;
1.4.2.5 ! yamt      798:        char *ul_section = ul_pager(section);
        !           799:        char *ul_name = ul_pager(name);
        !           800:        char *ul_name_desc = ul_pager(name_desc);
        !           801:        (orig_data->callback)(orig_data->data, ul_section, ul_name,
        !           802:            ul_name_desc, psnippet, psnippet_length);
        !           803:        free(ul_section);
        !           804:        free(ul_name);
        !           805:        free(ul_name_desc);
1.4.2.2   yamt      806:        free(psnippet);
                    807:        return 0;
                    808: }
                    809:
1.4.2.5 ! yamt      810: struct term_args {
        !           811:        struct orig_callback_data *orig_data;
        !           812:        const char *smul;
        !           813:        const char *rmul;
        !           814: };
        !           815:
        !           816: /*
        !           817:  * underline a string, pager style.
        !           818:  */
        !           819: static char *
        !           820: ul_term(const char *s, const struct term_args *ta)
        !           821: {
        !           822:        char *dst;
        !           823:
        !           824:        easprintf(&dst, "%s%s%s", ta->smul, s, ta->rmul);
        !           825:        return dst;
        !           826: }
        !           827:
        !           828: /*
        !           829:  * callback_term --
        !           830:  *  A callback similar to callback_html. It overstrikes the matching text in
        !           831:  *  the snippet so that it appears emboldened when viewed using a pager like
        !           832:  *  more or less.
        !           833:  */
        !           834: static int
        !           835: callback_term(void *data, const char *section, const char *name,
        !           836:        const char *name_desc, const char *snippet, size_t snippet_length)
        !           837: {
        !           838:        struct term_args *ta = data;
        !           839:        struct orig_callback_data *orig_data = ta->orig_data;
        !           840:
        !           841:        char *ul_section = ul_term(section, ta);
        !           842:        char *ul_name = ul_term(name, ta);
        !           843:        char *ul_name_desc = ul_term(name_desc, ta);
        !           844:        (orig_data->callback)(orig_data->data, ul_section, ul_name,
        !           845:            ul_name_desc, snippet, snippet_length);
        !           846:        free(ul_section);
        !           847:        free(ul_name);
        !           848:        free(ul_name_desc);
        !           849:        return 0;
        !           850: }
        !           851:
1.4.2.2   yamt      852: /*
                    853:  * run_query_pager --
                    854:  *  Utility function similar to run_query_html. This function tries to
                    855:  *  pre-process the result assuming it will be piped to a pager.
                    856:  *  For this purpose it first calls it's own callback function callback_pager
                    857:  *  which then delegates the call to the user supplied callback.
                    858:  */
1.4.2.3   yamt      859: int
                    860: run_query_pager(sqlite3 *db, query_args *args)
1.4.2.2   yamt      861: {
                    862:        struct orig_callback_data orig_data;
                    863:        orig_data.callback = args->callback;
                    864:        orig_data.data = args->callback_data;
                    865:        const char *snippet_args[] = {"\002", "\003", "..."};
                    866:        args->callback = &callback_pager;
                    867:        args->callback_data = (void *) &orig_data;
                    868:        return run_query(db, snippet_args, args);
                    869: }
1.4.2.5 ! yamt      870:
        !           871: static void
        !           872: term_init(int fd, const char *sa[5])
        !           873: {
        !           874:        TERMINAL *ti;
        !           875:        int error;
        !           876:        const char *bold, *sgr0, *smso, *rmso, *smul, *rmul;
        !           877:
        !           878:        if (ti_setupterm(&ti, NULL, fd, &error) == -1) {
        !           879:                bold = sgr0 = NULL;
        !           880:                smso = rmso = smul = rmul = "";
        !           881:                ti = NULL;
        !           882:        } else {
        !           883:                bold = ti_getstr(ti, "bold");
        !           884:                sgr0 = ti_getstr(ti, "sgr0");
        !           885:                if (bold == NULL || sgr0 == NULL) {
        !           886:                        smso = ti_getstr(ti, "smso");
        !           887:
        !           888:                        if (smso == NULL ||
        !           889:                            (rmso = ti_getstr(ti, "rmso")) == NULL)
        !           890:                                smso = rmso = "";
        !           891:                        bold = sgr0 = NULL;
        !           892:                } else
        !           893:                        smso = rmso = "";
        !           894:
        !           895:                smul = ti_getstr(ti, "smul");
        !           896:                if (smul == NULL || (rmul = ti_getstr(ti, "rmul")) == NULL)
        !           897:                        smul = rmul = "";
        !           898:        }
        !           899:
        !           900:        sa[0] = estrdup(bold ? bold : smso);
        !           901:        sa[1] = estrdup(sgr0 ? sgr0 : rmso);
        !           902:        sa[2] = estrdup("...");
        !           903:        sa[3] = estrdup(smul);
        !           904:        sa[4] = estrdup(rmul);
        !           905:        if (ti)
        !           906:                del_curterm(ti);
        !           907: }
        !           908:
        !           909: /*
        !           910:  * run_query_term --
        !           911:  *  Utility function similar to run_query_html. This function tries to
        !           912:  *  pre-process the result assuming it will be displayed on a terminal
        !           913:  *  For this purpose it first calls it's own callback function callback_pager
        !           914:  *  which then delegates the call to the user supplied callback.
        !           915:  */
        !           916: int
        !           917: run_query_term(sqlite3 *db, query_args *args)
        !           918: {
        !           919:        struct orig_callback_data orig_data;
        !           920:        struct term_args ta;
        !           921:        orig_data.callback = args->callback;
        !           922:        orig_data.data = args->callback_data;
        !           923:        const char *snippet_args[5];
        !           924:        term_init(STDOUT_FILENO, snippet_args);
        !           925:        ta.smul = snippet_args[3];
        !           926:        ta.rmul = snippet_args[4];
        !           927:        ta.orig_data = (void *) &orig_data;
        !           928:
        !           929:        args->callback = &callback_term;
        !           930:        args->callback_data = &ta;
        !           931:        return run_query(db, snippet_args, args);
        !           932: }

CVSweb <webmaster@jp.NetBSD.org>