侧边栏壁纸
博主头像
落叶人生博主等级

走进秋风,寻找秋天的落叶

  • 累计撰写 130562 篇文章
  • 累计创建 28 个标签
  • 累计收到 9 条评论
标签搜索

目 录CONTENT

文章目录

我封装的全文检索之solr篇

2022-06-21 星期二 / 0 评论 / 0 点赞 / 91 阅读 / 13880 字

折腾了好几天,终于把东西都搬到新住处了,累死我了.现在是光着膀子坐在电脑前码字. 前几天发表了一篇文章,写的是关于lucene(见文章我封装的全文检索之lucene篇),对于这篇文章大家什么看法都

    折腾了好几天,终于把东西都搬到新住处了,累死我了.现在是光着膀子坐在电脑前码字.
    前几天发表了一篇文章,写的是关于lucene(见文章我封装的全文检索之lucene篇),对于这篇文章大家什么看法都有,有好有坏,不管好坏,都谢谢大家,我会继续努力写下去的,我也会参考你们的建议去修改一下,争取写出更好的!
    今天准备写的是关于solr的,solr相信大家有的已经很熟悉了,具体是什么玩意,什么怎么用啊,我就不写了.浪费oschina服务器硬盘空间.我就写写,我封装的这套所谓的框架(好多人都说仅仅只是一个对索引的创建,更新,删除以及查询的几个操作而已,不过确实是这样的.名字起的有点大了.)
    啥也不说,先浪费点oschina的硬盘再说(贴代码):
    

package com.message.base.search.engine;import com.message.base.pagination.PaginationSupport;import com.message.base.pagination.PaginationUtils;import com.message.base.search.SearchBean;import com.message.base.search.SearchInitException;import com.message.base.utils.StringUtils;import org.apache.solr.client.solrj.SolrQuery;import org.apache.solr.client.solrj.SolrServer;import org.apache.solr.client.solrj.impl.CommonsHttpSolrServer;import org.apache.solr.client.solrj.response.QueryResponse;import org.apache.solr.client.solrj.response.UpdateResponse;import org.apache.solr.common.SolrDocument;import org.apache.solr.common.SolrDocumentList;import org.apache.solr.common.SolrInputDocument;import org.slf4j.Logger;import org.slf4j.LoggerFactory;import org.springframework.beans.BeanUtils;import java.net.MalformedURLException;import java.util.*;/** * 基于solr实现的搜索引擎. * * @author sunhao([email protected]) * @version V1.0 * @createTime 13-5-5 下午9:36 */public class SolrSearchEngine extends AbstractSearchEngine {    private static final Logger logger = LoggerFactory.getLogger(SolrSearchEngine.class);    private String server = "http://localhost:8080/solr";    private SolrServer getSolrServer(){        if(StringUtils.isEmpty(server)){            logger.error("null solr server path!");            throw new SearchInitException("Give a null solr server path");        }        try {            return new CommonsHttpSolrServer(server);        } catch (MalformedURLException e) {            throw new SearchInitException("Connect to solr server error use server '" + server + "'");        }    }    public synchronized void doIndex(List<SearchBean> searchBeans) throws Exception {        SolrServer solrServer = getSolrServer();        List<SolrInputDocument> sids = new ArrayList<SolrInputDocument>();        for(SearchBean sb : searchBeans){            if(sb == null){                logger.debug("give SearchBean is null!");                return;            }            //初始化一些字段            sb.initPublicFields();            SolrInputDocument sid = new SolrInputDocument();            //保证每个对象的唯一性,而且通过对象的主键可以明确的找到这个对象在solr中的索引            sid.addField("id", "uniqueKey-" + sb.getIndexType() + "-" + sb.getId());            if(StringUtils.isEmpty(sb.getId())){                throw new SearchInitException("you must give a id");            }            sid.addField("pkId", sb.getId());            if(StringUtils.isEmpty(sb.getKeyword())){                throw new SearchInitException("you must give a keyword");            }            sid.addField("keyword", sb.getKeyword());            if(StringUtils.isEmpty(sb.getOwerId())){                throw new SearchInitException("you must give a owerId");            }            sid.addField("owerId", sb.getOwerId());            if(StringUtils.isEmpty(sb.getOwerName())){                throw new SearchInitException("you must give a owerName");            }            sid.addField("owerName", sb.getOwerName());            if(StringUtils.isEmpty(sb.getLink())){                throw new SearchInitException("you must give a link");            }            sid.addField("link", sb.getLink());            if(StringUtils.isEmpty(sb.getCreateDate())){                throw new SearchInitException("you must give a createDate");            }            sid.addField("createDate", sb.getCreateDate());            sid.addField("indexType", getIndexType(sb));            String[] doIndexFields = sb.getDoIndexFields();            Map<String, String> values = sb.getIndexFieldValues();            if(doIndexFields != null && doIndexFields.length > 0){                for(String f : doIndexFields){                    //匹配动态字段                    sid.addField(f + "_message", values.get(f));                }            }            sids.add(sid);        }        solrServer.add(sids);        solrServer.commit();    }    public synchronized void deleteIndex(SearchBean bean) throws Exception {        if(bean == null){            logger.warn("Get search bean is empty!");            return;        }        String id = bean.getId();        if(StringUtils.isEmpty(id)){            logger.warn("get id and id value from bean is empty!");            return;        }        SolrServer server = getSolrServer();        UpdateResponse ur = server.deleteByQuery("pkId:" + id);        logger.debug("delete all indexs! UpdateResponse is '{}'! execute for '{}'ms!", ur, ur.getElapsedTime());        server.commit();    }    public synchronized void deleteIndexs(List<SearchBean> beans) throws Exception {        if(beans == null){            logger.warn("Get beans is empty!");            return;        }        for(SearchBean bean : beans){            this.deleteIndex(bean);        }    }    public PaginationSupport doSearch(List<SearchBean> beans, boolean isHighlighter, int start, int num) throws Exception {        if(beans == null || beans.isEmpty()){            logger.debug("given search beans is empty!");            return PaginationUtils.getNullPagination();        }        List queryResults = new ArrayList();        StringBuffer query_ = new StringBuffer();        for(SearchBean bean : beans){            //要进行检索的字段            String[] doSearchFields = bean.getDoSearchFields();            if(doSearchFields == null || doSearchFields.length == 0)                continue;            for(int i = 0; i < doSearchFields.length; i++){                String f = doSearchFields[i];                query_.append("(").append(f).append("_message:*").append(bean.getKeyword()).append("*").append(")");                if(i + 1 != doSearchFields.length)                    query_.append(" OR ");            }        }        if(StringUtils.isEmpty(query_.toString())){            logger.warn("query string is null!");            return PaginationUtils.getNullPagination();        }        SolrQuery query = new SolrQuery();        query.setQuery(query_.toString());        query.setStart(start == -1 ? 0 : start);        query.setRows(num == -1 ? 100000000 : num);        query.setFields("*", "score");        if(isHighlighter){            query.setHighlight(true).setHighlightSimplePre(getHtmlPrefix()).setHighlightSimplePost(getHtmlSuffix());            query.setHighlightSnippets(2);            query.setHighlightFragsize(1000);            query.setParam("hl.fl", "*");        }        QueryResponse response = getSolrServer().query(query);        SolrDocumentList sd = response.getResults();        for(Iterator it = sd.iterator(); it.hasNext(); ){            SolrDocument doc = (SolrDocument) it.next();            String indexType = doc.get("indexType").toString();            SearchBean result = super.getSearchBean(indexType, beans);            try {                result.setId(doc.getFieldValue("pkId").toString());                result.setLink(doc.getFieldValue("link").toString());                result.setOwerId(doc.getFieldValue("owerId").toString());                result.setOwerName(doc.getFieldValue("owerName").toString());                result.setCreateDate(doc.getFieldValue("createDate").toString());                result.setIndexType(doc.getFieldValue("indexType").toString());                String keyword = StringUtils.EMPTY;                if(isHighlighter){                    String id = (String) doc.getFieldValue("id");                    List temp = response.getHighlighting().get(id).get("keyword");                    if(temp != null && !temp.isEmpty()){                        keyword = temp.get(0).toString();                    }                }                if(StringUtils.isEmpty(keyword))                    keyword = doc.getFieldValue("keyword").toString();                result.setKeyword(keyword);                //要进行检索的字段                String[] doSearchFields = result.getDoSearchFields();                if(doSearchFields == null || doSearchFields.length == 0)                    continue;                Map<String, String> extendValues = new HashMap<String, String>();                for(String field : doSearchFields){                    String value = doc.getFieldValue(field + "_message").toString();                    if(isHighlighter){                        String id = (String) doc.getFieldValue("id");                        List temp = response.getHighlighting().get(id).get(field + "_message");                        if(temp != null && !temp.isEmpty()){                            value = temp.get(0).toString();                        }                    }                    extendValues.put(field, value);                }                result.setSearchValues(extendValues);            } catch (Exception e) {                logger.error(e.getMessage(), e);            }            queryResults.add(result);        }        PaginationSupport paginationSupport = PaginationUtils.makePagination(queryResults, Long.valueOf(sd.getNumFound()).intValue(), num, start);        return paginationSupport;    }    public synchronized void deleteIndexsByIndexType(Class<? extends SearchBean> clazz) throws Exception {        String indexType = getIndexType(BeanUtils.instantiate(clazz));        this.deleteIndexsByIndexType(indexType);    }    public synchronized void deleteIndexsByIndexType(String indexType) throws Exception {        SolrServer server = getSolrServer();        UpdateResponse ur = server.deleteByQuery("indexType:" + indexType);        logger.debug("delete all indexs! UpdateResponse is '{}'! execute for '{}'ms!", ur, ur.getElapsedTime());        server.commit();    }    public synchronized void deleteAllIndexs() throws Exception {        SolrServer server = getSolrServer();        UpdateResponse ur = server.deleteByQuery("*:*");        logger.debug("delete all indexs! UpdateResponse is '{}'! execute for '{}'ms!", ur, ur.getElapsedTime());        server.commit();    }    public void updateIndex(SearchBean searchBean) throws Exception {        this.updateIndexs(Collections.singletonList(searchBean));    }    /**     * 更新索引<br/>     * 在solr中更新索引也就是创建索引(当有相同ID存在的时候,仅仅更新,否则新建)<br/>     * {@link SolrSearchEngine#doIndex(java.util.List)}     *     * @param searchBeans       需要更新的beans     * @throws Exception     */    public void updateIndexs(List<SearchBean> searchBeans) throws Exception {        this.doIndex(searchBeans);    }    public void setServer(String server) {        this.server = server;    }}



关于solr服务端的配置,我想说的就是那个schema.xml文件的配置:
1.这里我配置了几个共有的字段,如下:

<!-- start my solr -->   <field name="pkId" type="string" indexed="true" stored="true"/>   <field name="keyword" type="string" indexed="true" stored="true"/>   <field name="owerId" type="string" indexed="true" stored="true"/>   <field name="owerName" type="string" indexed="true" stored="true"/>   <field name="link" type="string" indexed="true" stored="true"/>   <field name="createDate" type="string" indexed="true" stored="true"/>   <field name="indexType" type="string" indexed="true" stored="true"/><!-- end my solr -->
这些是一些固定字段,也是每个对象都通用的.
<!-- a dynamic field, match all fields what end with _solr --><dynamicField name="*_message" type="paodingAnalyzer" indexed="true" stored="true"/>
这个是动态匹配字段,比如说我有一个对象其实一个字段是真实姓名(truename),那么在solr索引中的字段名称就叫(username_message).这样就能匹配起来了,so easy!


再谈谈solr使用分词,暂时我使用的是庖丁分词(paoding),需要的可以去网上找找,osc上就有的.
需要在solr的schema.xml添加一个字段类型:

<!-- paoding --><fieldType name="paodingAnalyzer" class="solr.TextField">      <analyzer class="net.paoding.analysis.analyzer.PaodingAnalyzer"></analyzer>  </fieldType>

然后在你需要使用分词的字段的配置上,修改type="paodingAnalyzer".跟上面的动态字段一致.
可以检查一下是否配置正确:
访问http://192.168.1.118/solr/admin/analysis.jsp?highlight=on
安装以下图片说明操作:


好了,over here.具体对索引的新增.删除.更新.以及查询的操作见上面的代码,相信对于沉浸在oschina这么多年的你们,这些都是小case了.


广告 广告

评论区