|
|
马上注册,结交更多好友,享用更多功能,让你轻松玩转社区。
您需要 登录 才可以下载或查看,没有账号?立即注册
x
引言
在当今信息爆炸的时代,搜索功能已经成为几乎所有应用程序的核心组成部分。无论是电子商务网站、企业内部系统还是社交媒体平台,高效、准确的搜索功能都是提升用户体验的关键因素。本文将深入探讨在Java项目中实现高效搜索功能的核心技术与方法,从基础实现到高级优化,帮助开发者构建满足各种需求的搜索解决方案。
搜索功能的重要性与应用场景
搜索功能在各类应用中扮演着至关重要的角色:
1. 电子商务平台:帮助用户快速找到所需商品,提高转化率
2. 内容管理系统:便于内容检索和管理
3. 企业内部系统:加速信息查找,提高工作效率
4. 社交媒体:实现用户、话题和内容的快速定位
5. 文档管理系统:支持海量文档的快速检索
一个高效的搜索系统不仅能提供快速响应,还应具备相关性排序、模糊匹配、高亮显示等高级功能,以满足用户多样化的搜索需求。
Java中实现搜索的核心技术
基本数据结构搜索
对于小型应用或简单需求,可以使用Java内置的数据结构实现基本搜索功能。
- // 使用List进行线性搜索
- public List<Product> searchByName(List<Product> products, String keyword) {
- List<Product> result = new ArrayList<>();
- for (Product product : products) {
- if (product.getName().toLowerCase().contains(keyword.toLowerCase())) {
- result.add(product);
- }
- }
- return result;
- }
- // 使用Set进行快速查找
- public Set<Product> searchInSet(Set<Product> productSet, String keyword) {
- Set<Product> result = new HashSet<>();
- for (Product product : productSet) {
- if (product.getName().toLowerCase().contains(keyword.toLowerCase())) {
- result.add(product);
- }
- }
- return result;
- }
复制代码- // 使用Map实现基于ID的快速查找
- public Map<String, Product> productMap = new HashMap<>();
- public Product searchById(String id) {
- return productMap.get(id);
- }
- // 使用Map进行多条件搜索
- public List<Product> searchByMultipleCriteria(Map<String, String> criteria) {
- List<Product> result = new ArrayList<>();
- for (Product product : productMap.values()) {
- boolean match = true;
- for (Map.Entry<String, String> entry : criteria.entrySet()) {
- String key = entry.getKey();
- String value = entry.getValue().toLowerCase();
-
- if ("name".equals(key) && !product.getName().toLowerCase().contains(value)) {
- match = false;
- break;
- } else if ("category".equals(key) && !product.getCategory().toLowerCase().equals(value)) {
- match = false;
- break;
- }
- // 可以添加更多条件
- }
- if (match) {
- result.add(product);
- }
- }
- return result;
- }
复制代码
数据库搜索
对于大多数企业应用,数据存储在关系数据库中,利用SQL查询是实现搜索的常见方式。
- // 使用JDBC实现基本搜索
- public List<Product> searchInDatabase(String keyword) {
- List<Product> result = new ArrayList<>();
- String sql = "SELECT * FROM products WHERE name LIKE ? OR description LIKE ?";
-
- try (Connection conn = dataSource.getConnection();
- PreparedStatement stmt = conn.prepareStatement(sql)) {
-
- String searchPattern = "%" + keyword + "%";
- stmt.setString(1, searchPattern);
- stmt.setString(2, searchPattern);
-
- ResultSet rs = stmt.executeQuery();
- while (rs.next()) {
- Product product = new Product();
- product.setId(rs.getString("id"));
- product.setName(rs.getString("name"));
- product.setDescription(rs.getString("description"));
- // 设置其他属性
- result.add(product);
- }
- } catch (SQLException e) {
- e.printStackTrace();
- }
- return result;
- }
复制代码- // 使用JPA Repository方法
- public interface ProductRepository extends JpaRepository<Product, String> {
-
- // 基本名称搜索
- List<Product> findByNameContainingIgnoreCase(String keyword);
-
- // 多条件搜索
- List<Product> findByNameContainingIgnoreCaseAndCategory(String name, String category);
-
- // 自定义查询
- @Query("SELECT p FROM Product p WHERE LOWER(p.name) LIKE LOWER(CONCAT('%', ?1, '%')) OR LOWER(p.description) LIKE LOWER(CONCAT('%', ?1, '%'))")
- List<Product> searchByKeyword(String keyword);
- }
- // 服务层使用
- @Service
- public class ProductService {
-
- @Autowired
- private ProductRepository productRepository;
-
- public List<Product> searchProducts(String keyword) {
- return productRepository.searchByKeyword(keyword);
- }
-
- public List<Product> advancedSearch(String name, String category, Double minPrice, Double maxPrice) {
- Specification<Product> spec = Specification.where(null);
-
- if (name != null && !name.isEmpty()) {
- spec = spec.and((root, query, cb) ->
- cb.like(cb.lower(root.get("name")), "%" + name.toLowerCase() + "%"));
- }
-
- if (category != null && !category.isEmpty()) {
- spec = spec.and((root, query, cb) ->
- cb.equal(root.get("category"), category));
- }
-
- if (minPrice != null) {
- spec = spec.and((root, query, cb) ->
- cb.ge(root.get("price"), minPrice));
- }
-
- if (maxPrice != null) {
- spec = spec.and((root, query, cb) ->
- cb.le(root.get("price"), maxPrice));
- }
-
- return productRepository.findAll(spec);
- }
- }
复制代码
全文搜索引擎集成
对于大规模数据和高性能搜索需求,集成专门的全文搜索引擎是最佳选择。
搜索性能优化技术
索引技术
索引是提高搜索性能的关键技术,它通过预先构建数据结构来加速查询过程。
- // 在实体类中定义索引
- @Entity
- @Table(name = "products",
- indexes = {
- @Index(name = "idx_product_name", columnList = "name"),
- @Index(name = "idx_product_category", columnList = "category"),
- @Index(name = "idx_product_price", columnList = "price")
- })
- public class Product {
- @Id
- private String id;
-
- @Column(name = "name", nullable = false)
- private String name;
-
- @Column(name = "category")
- private String category;
-
- @Column(name = "price")
- private Double price;
-
- // 其他字段、getter和setter
- }
复制代码- // 创建复合索引以支持多字段查询
- @Entity
- @Table(name = "products",
- indexes = {
- @Index(name = "idx_product_name_category", columnList = "name, category"),
- @Index(name = "idx_product_category_price", columnList = "category, price")
- })
- public class Product {
- // 实体定义
- }
复制代码
缓存策略
缓存可以显著减少数据库访问次数,提高搜索响应速度。
- @Service
- public class ProductService {
-
- @Autowired
- private ProductRepository productRepository;
-
- @Cacheable(value = "productSearch", key = "#keyword")
- public List<Product> searchProducts(String keyword) {
- return productRepository.searchByKeyword(keyword);
- }
-
- @CacheEvict(value = "productSearch", allEntries = true)
- public void clearSearchCache() {
- // 清除所有搜索缓存
- }
-
- @CacheEvict(value = "productSearch", key = "#product.id")
- public void updateProduct(Product product) {
- productRepository.save(product);
- }
- }
复制代码- @Service
- public class ProductSearchService {
-
- @Autowired
- private ProductRepository productRepository;
-
- @Autowired
- private RedisTemplate<String, Object> redisTemplate;
-
- private static final String SEARCH_CACHE_PREFIX = "search:";
-
- public List<Product> searchWithCache(String keyword) {
- String cacheKey = SEARCH_CACHE_PREFIX + keyword;
-
- // 尝试从缓存获取
- List<Product> cachedResult = (List<Product>) redisTemplate.opsForValue().get(cacheKey);
- if (cachedResult != null) {
- return cachedResult;
- }
-
- // 缓存未命中,查询数据库
- List<Product> result = productRepository.searchByKeyword(keyword);
-
- // 将结果存入缓存,设置过期时间
- redisTemplate.opsForValue().set(cacheKey, result, 1, TimeUnit.HOURS);
-
- return result;
- }
-
- public void invalidateSearchCache(String keyword) {
- String cacheKey = SEARCH_CACHE_PREFIX + keyword;
- redisTemplate.delete(cacheKey);
- }
- }
复制代码
查询优化
优化查询语句和执行计划可以显著提高搜索性能。
- // 使用EXPLAIN分析查询性能
- public void analyzeQueryPerformance(String keyword) {
- String sql = "EXPLAIN SELECT * FROM products WHERE name LIKE ?";
-
- try (Connection conn = dataSource.getConnection();
- PreparedStatement stmt = conn.prepareStatement(sql)) {
-
- stmt.setString(1, "%" + keyword + "%");
- ResultSet rs = stmt.executeQuery();
-
- while (rs.next()) {
- // 分析查询执行计划
- System.out.println(rs.getString(1));
- }
- } catch (SQLException e) {
- e.printStackTrace();
- }
- }
- // 优化后的查询,避免使用前导通配符
- public List<Product> optimizedSearch(String keyword) {
- // 避免使用前导通配符(%),这会导致索引失效
- if (keyword.startsWith("%")) {
- keyword = keyword.substring(1);
- }
-
- return productRepository.searchByKeyword(keyword);
- }
复制代码- // 使用JPA实现高效分页
- public Page<Product> searchWithPagination(String keyword, int page, int size) {
- PageRequest pageRequest = PageRequest.of(page, size, Sort.by("name").ascending());
- return productRepository.findByNameContainingIgnoreCase(keyword, pageRequest);
- }
- // 使用游标分页(Cursor-based Pagination)替代传统分页
- public List<Product> searchWithCursor(String keyword, String lastId, int limit) {
- if (lastId == null || lastId.isEmpty()) {
- // 第一页查询
- return productRepository.findFirst10ByNameContainingIgnoreCaseOrderByIdAsc(keyword);
- } else {
- // 后续页面查询,使用ID作为游标
- return productRepository.findFirst10ByIdAfterAndNameContainingIgnoreCaseOrderByIdAsc(lastId, keyword);
- }
- }
复制代码
常用搜索框架和库的介绍与使用
Lucene
Apache Lucene是一个高性能、全功能的文本搜索引擎库,它提供了强大的索引和搜索功能。
- public class LuceneSearchService {
-
- private final Directory indexDirectory;
- private final Analyzer analyzer;
-
- public LuceneSearchService(String indexDirPath) throws IOException {
- this.indexDirectory = FSDirectory.open(Paths.get(indexDirPath));
- this.analyzer = new StandardAnalyzer();
- }
-
- // 创建索引
- public void indexProducts(List<Product> products) throws IOException {
- IndexWriterConfig config = new IndexWriterConfig(analyzer);
- IndexWriter writer = new IndexWriter(indexDirectory, config);
-
- // 清除现有索引
- writer.deleteAll();
-
- for (Product product : products) {
- Document doc = new Document();
- doc.add(new StringField("id", product.getId(), Field.Store.YES));
- doc.add(new TextField("name", product.getName(), Field.Store.YES));
- doc.add(new TextField("description", product.getDescription(), Field.Store.YES));
- doc.add(new StringField("category", product.getCategory(), Field.Store.YES));
- doc.add(new DoublePoint("price", product.getPrice()));
-
- writer.addDocument(doc);
- }
-
- writer.close();
- }
-
- // 搜索产品
- public List<Product> searchProducts(String queryStr, int maxResults) throws IOException, ParseException {
- QueryParser parser = new QueryParser("name", analyzer);
- Query query = parser.parse(queryStr);
-
- IndexReader reader = DirectoryReader.open(indexDirectory);
- IndexSearcher searcher = new IndexSearcher(reader);
-
- TopDocs topDocs = searcher.search(query, maxResults);
- ScoreDoc[] scoreDocs = topDocs.scoreDocs;
-
- List<Product> results = new ArrayList<>();
- for (ScoreDoc scoreDoc : scoreDocs) {
- Document doc = searcher.doc(scoreDoc.doc);
-
- Product product = new Product();
- product.setId(doc.get("id"));
- product.setName(doc.get("name"));
- product.setDescription(doc.get("description"));
- product.setCategory(doc.get("category"));
- product.setPrice(Double.parseDouble(doc.get("price")));
-
- results.add(product);
- }
-
- reader.close();
- return results;
- }
-
- // 高级搜索 - 多字段搜索
- public List<Product> advancedSearch(String queryStr, int maxResults) throws IOException, ParseException {
- // 创建多字段查询
- String[] fields = {"name", "description", "category"};
- BooleanQuery.Builder booleanQuery = new BooleanQuery.Builder();
-
- QueryParser parser = new QueryParser("", analyzer);
- Query query = parser.parse(queryStr);
-
- for (String field : fields) {
- booleanQuery.add(new BooleanClause(new TermQuery(new Term(field, queryStr)), BooleanClause.Occur.SHOULD));
- }
-
- IndexReader reader = DirectoryReader.open(indexDirectory);
- IndexSearcher searcher = new IndexSearcher(reader);
-
- TopDocs topDocs = searcher.search(booleanQuery.build(), maxResults);
- ScoreDoc[] scoreDocs = topDocs.scoreDocs;
-
- List<Product> results = new ArrayList<>();
- for (ScoreDoc scoreDoc : scoreDocs) {
- Document doc = searcher.doc(scoreDoc.doc);
-
- Product product = new Product();
- product.setId(doc.get("id"));
- product.setName(doc.get("name"));
- product.setDescription(doc.get("description"));
- product.setCategory(doc.get("category"));
- product.setPrice(Double.parseDouble(doc.get("price")));
-
- results.add(product);
- }
-
- reader.close();
- return results;
- }
- }
复制代码
Elasticsearch
Elasticsearch是一个基于Lucene的分布式、RESTful搜索和分析引擎,适用于大规模数据搜索。
- @Service
- public class ElasticsearchService {
-
- private final RestHighLevelClient client;
-
- public ElasticsearchService() {
- client = new RestHighLevelClient(
- RestClient.builder(
- new HttpHost("localhost", 9200, "http")));
- }
-
- // 创建索引
- public void createIndex(String indexName) throws IOException {
- CreateIndexRequest request = new CreateIndexRequest(indexName);
- client.indices().create(request, RequestOptions.DEFAULT);
- }
-
- // 索引产品数据
- public void indexProduct(String indexName, Product product) throws IOException {
- IndexRequest request = new IndexRequest(indexName);
- request.id(product.getId());
-
- // 将产品对象转换为JSON字符串
- ObjectMapper mapper = new ObjectMapper();
- String json = mapper.writeValueAsString(product);
- request.source(json, XContentType.JSON);
-
- client.index(request, RequestOptions.DEFAULT);
- }
-
- // 批量索引产品
- public void bulkIndexProducts(String indexName, List<Product> products) throws IOException {
- BulkRequest request = new BulkRequest();
- ObjectMapper mapper = new ObjectMapper();
-
- for (Product product : products) {
- IndexRequest indexRequest = new IndexRequest(indexName)
- .id(product.getId())
- .source(mapper.writeValueAsString(product), XContentType.JSON);
- request.add(indexRequest);
- }
-
- client.bulk(request, RequestOptions.DEFAULT);
- }
-
- // 基本搜索
- public List<Product> searchProducts(String indexName, String fieldName, String value, int size) throws IOException {
- SearchRequest request = new SearchRequest(indexName);
- SearchSourceBuilder sourceBuilder = new SearchSourceBuilder();
-
- // 匹配查询
- sourceBuilder.query(QueryBuilders.matchQuery(fieldName, value));
- sourceBuilder.size(size);
-
- request.source(sourceBuilder);
-
- SearchResponse response = client.search(request, RequestOptions.DEFAULT);
- SearchHit[] hits = response.getHits().getHits();
-
- List<Product> results = new ArrayList<>();
- ObjectMapper mapper = new ObjectMapper();
-
- for (SearchHit hit : hits) {
- Product product = mapper.readValue(hit.getSourceAsString(), Product.class);
- results.add(product);
- }
-
- return results;
- }
-
- // 多条件搜索
- public List<Product> multiFieldSearch(String indexName, String keyword, String category, Double minPrice, Double maxPrice, int size) throws IOException {
- SearchRequest request = new SearchRequest(indexName);
- SearchSourceBuilder sourceBuilder = new SearchSourceBuilder();
-
- BoolQueryBuilder boolQuery = QueryBuilders.boolQuery();
-
- // 关键词搜索
- if (keyword != null && !keyword.isEmpty()) {
- boolQuery.should(QueryBuilders.matchQuery("name", keyword));
- boolQuery.should(QueryBuilders.matchQuery("description", keyword));
- }
-
- // 分类过滤
- if (category != null && !category.isEmpty()) {
- boolQuery.filter(QueryBuilders.termQuery("category", category));
- }
-
- // 价格范围过滤
- if (minPrice != null || maxPrice != null) {
- RangeQueryBuilder rangeQuery = QueryBuilders.rangeQuery("price");
- if (minPrice != null) {
- rangeQuery.gte(minPrice);
- }
- if (maxPrice != null) {
- rangeQuery.lte(maxPrice);
- }
- boolQuery.filter(rangeQuery);
- }
-
- sourceBuilder.query(boolQuery);
- sourceBuilder.size(size);
-
- request.source(sourceBuilder);
-
- SearchResponse response = client.search(request, RequestOptions.DEFAULT);
- SearchHit[] hits = response.getHits().getHits();
-
- List<Product> results = new ArrayList<>();
- ObjectMapper mapper = new ObjectMapper();
-
- for (SearchHit hit : hits) {
- Product product = mapper.readValue(hit.getSourceAsString(), Product.class);
- results.add(product);
- }
-
- return results;
- }
-
- // 聚合搜索
- public Map<String, Object> searchWithAggregations(String indexName, String keyword) throws IOException {
- SearchRequest request = new SearchRequest(indexName);
- SearchSourceBuilder sourceBuilder = new SearchSourceBuilder();
-
- // 基本查询
- if (keyword != null && !keyword.isEmpty()) {
- sourceBuilder.query(QueryBuilders.multiMatchQuery(keyword, "name", "description"));
- }
-
- // 添加分类聚合
- TermsAggregationBuilder categoryAggregation = AggregationBuilders.terms("categories")
- .field("category")
- .size(10);
-
- // 添加价格范围聚合
- RangeAggregationBuilder priceRangeAggregation = AggregationBuilders.range("price_ranges")
- .field("price")
- .addRange(0, 50)
- .addRange(50, 100)
- .addRange(100, 200)
- .addRange(200, Double.MAX_VALUE);
-
- sourceBuilder.aggregation(categoryAggregation);
- sourceBuilder.aggregation(priceRangeAggregation);
-
- request.source(sourceBuilder);
-
- SearchResponse response = client.search(request, RequestOptions.DEFAULT);
-
- // 处理聚合结果
- Map<String, Object> result = new HashMap<>();
-
- // 获取分类聚合结果
- Terms categoriesAgg = response.getAggregations().get("categories");
- List<Map<String, Object>> categories = new ArrayList<>();
- for (Terms.Bucket bucket : categoriesAgg.getBuckets()) {
- Map<String, Object> categoryInfo = new HashMap<>();
- categoryInfo.put("name", bucket.getKeyAsString());
- categoryInfo.put("count", bucket.getDocCount());
- categories.add(categoryInfo);
- }
- result.put("categories", categories);
-
- // 获取价格范围聚合结果
- Range priceRangesAgg = response.getAggregations().get("price_ranges");
- List<Map<String, Object>> priceRanges = new ArrayList<>();
- for (Range.Bucket bucket : priceRangesAgg.getBuckets()) {
- Map<String, Object> rangeInfo = new HashMap<>();
- rangeInfo.put("from", bucket.getFrom());
- rangeInfo.put("to", bucket.getTo());
- rangeInfo.put("count", bucket.getDocCount());
- priceRanges.add(rangeInfo);
- }
- result.put("priceRanges", priceRanges);
-
- return result;
- }
-
- // 关闭客户端
- public void close() throws IOException {
- client.close();
- }
- }
复制代码
Hibernate Search
Hibernate Search将Elasticsearch或Lucene与Hibernate ORM集成,提供透明的全文搜索功能。
- // 实体类配置
- @Entity
- @Table(name = "products")
- @Indexed(index = "products") // 声明此实体需要被索引
- public class Product {
-
- @Id
- @GeneratedValue(generator = "UUID")
- @GenericGenerator(name = "UUID", strategy = "org.hibernate.id.UUIDGenerator")
- @Column(name = "id", updatable = false, nullable = false)
- @DocumentId // 声明为文档ID
- private String id;
-
- @Column(name = "name", nullable = false)
- @Field(name = "name", analyzer = @Analyzer(definition = "standard")) // 声明为可搜索字段
- private String name;
-
- @Column(name = "description")
- @Field(name = "description", analyzer = @Analyzer(definition = "standard"))
- private String description;
-
- @Column(name = "category")
- @Field(name = "category", analyzer = @Analyzer(definition = "keyword"))
- private String category;
-
- @Column(name = "price")
- @Field(name = "price")
- @NumericField // 数值字段,支持范围查询
- private Double price;
-
- // 其他字段、getter和setter
- }
复制代码- @Service
- public class HibernateSearchService {
-
- @PersistenceContext
- private EntityManager entityManager;
-
- // 初始化索引
- public void initializeIndex() {
- FullTextEntityManager fullTextEntityManager = Search.getFullTextEntityManager(entityManager);
- try {
- fullTextEntityManager.createIndexer().startAndWait();
- } catch (InterruptedException e) {
- e.printStackTrace();
- }
- }
-
- // 基本搜索
- public List<Product> search(String keyword) {
- FullTextEntityManager fullTextEntityManager = Search.getFullTextEntityManager(entityManager);
-
- QueryBuilder qb = fullTextEntityManager.getSearchFactory()
- .buildQueryBuilder()
- .forEntity(Product.class)
- .get();
-
- // 在name和description字段中搜索关键词
- org.apache.lucene.search.Query luceneQuery = qb
- .keyword()
- .onFields("name", "description")
- .matching(keyword)
- .createQuery();
-
- // 包装Hibernate查询
- FullTextQuery jpaQuery = fullTextEntityManager.createFullTextQuery(luceneQuery, Product.class);
-
- // 执行搜索
- return jpaQuery.getResultList();
- }
-
- // 高级搜索 - 多条件组合
- public List<Product> advancedSearch(String keyword, String category, Double minPrice, Double maxPrice) {
- FullTextEntityManager fullTextEntityManager = Search.getFullTextEntityManager(entityManager);
-
- QueryBuilder qb = fullTextEntityManager.getSearchFactory()
- .buildQueryBuilder()
- .forEntity(Product.class)
- .get();
-
- BooleanQuery.Builder booleanQuery = new BooleanQuery.Builder();
-
- // 添加关键词搜索条件
- if (keyword != null && !keyword.isEmpty()) {
- org.apache.lucene.search.Query keywordQuery = qb
- .keyword()
- .onFields("name", "description")
- .matching(keyword)
- .createQuery();
- booleanQuery.add(keywordQuery, BooleanClause.Occur.SHOULD);
- }
-
- // 添加分类过滤条件
- if (category != null && !category.isEmpty()) {
- org.apache.lucene.search.Query categoryQuery = qb
- .keyword()
- .onField("category")
- .matching(category)
- .createQuery();
- booleanQuery.add(categoryQuery, BooleanClause.Occur.MUST);
- }
-
- // 添加价格范围过滤条件
- if (minPrice != null || maxPrice != null) {
- RangeQuery<Double> priceRangeQuery = qb
- .range()
- .onField("price")
- .from(minPrice != null ? minPrice : 0.0)
- .to(maxPrice != null ? maxPrice : Double.MAX_VALUE)
- .createQuery();
- booleanQuery.add(priceRangeQuery, BooleanClause.Occur.MUST);
- }
-
- // 如果没有添加任何条件,使用MatchAllDocsQuery
- if (booleanQuery.build().clauses().isEmpty()) {
- return fullTextEntityManager.createFullTextQuery(new MatchAllDocsQuery(), Product.class).getResultList();
- }
-
- // 包装Hibernate查询
- FullTextQuery jpaQuery = fullTextEntityManager.createFullTextQuery(booleanQuery.build(), Product.class);
-
- // 执行搜索
- return jpaQuery.getResultList();
- }
-
- // 分页搜索
- public List<Product> searchWithPagination(String keyword, int page, int size) {
- FullTextEntityManager fullTextEntityManager = Search.getFullTextEntityManager(entityManager);
-
- QueryBuilder qb = fullTextEntityManager.getSearchFactory()
- .buildQueryBuilder()
- .forEntity(Product.class)
- .get();
-
- org.apache.lucene.search.Query luceneQuery = qb
- .keyword()
- .onFields("name", "description")
- .matching(keyword)
- .createQuery();
-
- FullTextQuery jpaQuery = fullTextEntityManager.createFullTextQuery(luceneQuery, Product.class);
-
- // 设置分页
- jpaQuery.setFirstResult(page * size);
- jpaQuery.setMaxResults(size);
-
- // 执行搜索
- return jpaQuery.getResultList();
- }
-
- // 排序搜索
- public List<Product> searchWithSorting(String keyword, String sortField, boolean ascending) {
- FullTextEntityManager fullTextEntityManager = Search.getFullTextEntityManager(entityManager);
-
- QueryBuilder qb = fullTextEntityManager.getSearchFactory()
- .buildQueryBuilder()
- .forEntity(Product.class)
- .get();
-
- org.apache.lucene.search.Query luceneQuery = qb
- .keyword()
- .onFields("name", "description")
- .matching(keyword)
- .createQuery();
-
- FullTextQuery jpaQuery = fullTextEntityManager.createFullTextQuery(luceneQuery, Product.class);
-
- // 设置排序
- Sort sort = new Sort(new SortField(sortField, SortField.Type.STRING, !ascending));
- jpaQuery.setSort(sort);
-
- // 执行搜索
- return jpaQuery.getResultList();
- }
-
- // 模糊搜索
- public List<Product> fuzzySearch(String keyword, float similarity) {
- FullTextEntityManager fullTextEntityManager = Search.getFullTextEntityManager(entityManager);
-
- QueryBuilder qb = fullTextEntityManager.getSearchFactory()
- .buildQueryBuilder()
- .forEntity(Product.class)
- .get();
-
- org.apache.lucene.search.Query luceneQuery = qb
- .keyword()
- .fuzzy()
- .withEditDistanceUpTo(2) // 最大编辑距离
- .withPrefixLength(1) // 前缀长度
- .onFields("name", "description")
- .matching(keyword)
- .createQuery();
-
- FullTextQuery jpaQuery = fullTextEntityManager.createFullTextQuery(luceneQuery, Product.class);
-
- // 执行搜索
- return jpaQuery.getResultList();
- }
- }
复制代码
实际案例和代码示例
电子商务网站产品搜索实现
下面是一个完整的电子商务网站产品搜索实现示例,结合了多种技术。
- public interface ProductSearchService {
-
- /**
- * 基本关键词搜索
- * @param keyword 搜索关键词
- * @param page 页码
- * @param size 每页大小
- * @return 分页搜索结果
- */
- SearchResult<Product> searchByKeyword(String keyword, int page, int size);
-
- /**
- * 高级搜索
- * @param criteria 搜索条件
- * @param page 页码
- * @param size 每页大小
- * @return 分页搜索结果
- */
- SearchResult<Product> advancedSearch(SearchCriteria criteria, int page, int size);
-
- /**
- * 获取搜索建议
- * @param prefix 用户输入的前缀
- * @param size 返回建议数量
- * @return 搜索建议列表
- */
- List<String> getSuggestions(String prefix, int size);
-
- /**
- * 获取搜索聚合结果
- * @param criteria 搜索条件
- * @return 聚合结果
- */
- SearchAggregations getAggregations(SearchCriteria criteria);
- }
复制代码- public class SearchCriteria {
- private String keyword;
- private String category;
- private Double minPrice;
- private Double maxPrice;
- private List<String> brands;
- private List<String> attributes;
- private SortOption sortOption;
-
- // Getters and Setters
-
- public enum SortOption {
- RELEVANCE("relevance"),
- PRICE_ASC("price_asc"),
- PRICE_DESC("price_desc"),
- NAME_ASC("name_asc"),
- NAME_DESC("name_desc"),
- NEWEST("newest"),
- RATING("rating");
-
- private final String value;
-
- SortOption(String value) {
- this.value = value;
- }
-
- public String getValue() {
- return value;
- }
- }
- }
复制代码- public class SearchResult<T> {
- private List<T> content;
- private int totalPages;
- private long totalElements;
- private int pageNumber;
- private int pageSize;
-
- // Getters and Setters
- }
- public class SearchAggregations {
- private List<CategoryCount> categories;
- private List<PriceRangeCount> priceRanges;
- private List<BrandCount> brands;
- private List<AttributeCount> attributes;
-
- // Getters and Setters
-
- public static class CategoryCount {
- private String name;
- private long count;
-
- // Getters and Setters
- }
-
- public static class PriceRangeCount {
- private double from;
- private double to;
- private long count;
-
- // Getters and Setters
- }
-
- public static class BrandCount {
- private String name;
- private long count;
-
- // Getters and Setters
- }
-
- public static class AttributeCount {
- private String name;
- private String value;
- private long count;
-
- // Getters and Setters
- }
- }
复制代码- @Service
- public class ElasticsearchProductSearchService implements ProductSearchService {
-
- private final RestHighLevelClient client;
- private final ProductRepository productRepository;
-
- public ElasticsearchProductSearchService(ProductRepository productRepository) {
- this.productRepository = productRepository;
- this.client = new RestHighLevelClient(
- RestClient.builder(
- new HttpHost("localhost", 9200, "http")));
- }
-
- @Override
- public SearchResult<Product> searchByKeyword(String keyword, int page, int size) {
- SearchCriteria criteria = new SearchCriteria();
- criteria.setKeyword(keyword);
- return advancedSearch(criteria, page, size);
- }
-
- @Override
- public SearchResult<Product> advancedSearch(SearchCriteria criteria, int page, int size) {
- try {
- SearchRequest request = new SearchRequest("products");
- SearchSourceBuilder sourceBuilder = new SearchSourceBuilder();
-
- // 构建查询
- BoolQueryBuilder boolQuery = QueryBuilders.boolQuery();
-
- // 关键词搜索
- if (criteria.getKeyword() != null && !criteria.getKeyword().isEmpty()) {
- boolQuery.should(QueryBuilders.matchQuery("name", criteria.getKeyword()).boost(2.0f));
- boolQuery.should(QueryBuilders.matchQuery("description", criteria.getKeyword()));
- boolQuery.should(QueryBuilders.matchQuery("brand", criteria.getKeyword()));
- }
-
- // 分类过滤
- if (criteria.getCategory() != null && !criteria.getCategory().isEmpty()) {
- boolQuery.filter(QueryBuilders.termQuery("category", criteria.getCategory()));
- }
-
- // 价格范围过滤
- if (criteria.getMinPrice() != null || criteria.getMaxPrice() != null) {
- RangeQueryBuilder rangeQuery = QueryBuilders.rangeQuery("price");
- if (criteria.getMinPrice() != null) {
- rangeQuery.gte(criteria.getMinPrice());
- }
- if (criteria.getMaxPrice() != null) {
- rangeQuery.lte(criteria.getMaxPrice());
- }
- boolQuery.filter(rangeQuery);
- }
-
- // 品牌过滤
- if (criteria.getBrands() != null && !criteria.getBrands().isEmpty()) {
- boolQuery.filter(QueryBuilders.termsQuery("brand", criteria.getBrands()));
- }
-
- sourceBuilder.query(boolQuery);
-
- // 设置分页
- sourceBuilder.from(page * size);
- sourceBuilder.size(size);
-
- // 设置排序
- if (criteria.getSortOption() != null) {
- switch (criteria.getSortOption()) {
- case PRICE_ASC:
- sourceBuilder.sort(SortBuilders.fieldSort("price").order(SortOrder.ASC));
- break;
- case PRICE_DESC:
- sourceBuilder.sort(SortBuilders.fieldSort("price").order(SortOrder.DESC));
- break;
- case NAME_ASC:
- sourceBuilder.sort(SortBuilders.fieldSort("name.keyword").order(SortOrder.ASC));
- break;
- case NAME_DESC:
- sourceBuilder.sort(SortBuilders.fieldSort("name.keyword").order(SortOrder.DESC));
- break;
- case NEWEST:
- sourceBuilder.sort(SortBuilders.fieldSort("createdAt").order(SortOrder.DESC));
- break;
- case RATING:
- sourceBuilder.sort(SortBuilders.fieldSort("rating").order(SortOrder.DESC));
- break;
- case RELEVANCE:
- default:
- // 默认按相关性排序
- break;
- }
- }
-
- request.source(sourceBuilder);
-
- // 执行搜索
- SearchResponse response = client.search(request, RequestOptions.DEFAULT);
-
- // 处理结果
- List<Product> products = new ArrayList<>();
- ObjectMapper mapper = new ObjectMapper();
-
- for (SearchHit hit : response.getHits().getHits()) {
- Product product = mapper.readValue(hit.getSourceAsString(), Product.class);
- products.add(product);
- }
-
- // 构建返回结果
- SearchResult<Product> result = new SearchResult<>();
- result.setContent(products);
- result.setPageNumber(page);
- result.setPageSize(size);
- result.setTotalElements(response.getHits().getTotalHits().value);
- result.setTotalPages((int) Math.ceil((double) response.getHits().getTotalHits().value / size));
-
- return result;
- } catch (IOException e) {
- throw new RuntimeException("搜索失败", e);
- }
- }
-
- @Override
- public List<String> getSuggestions(String prefix, int size) {
- try {
- SearchRequest request = new SearchRequest("products");
- SearchSourceBuilder sourceBuilder = new SearchSourceBuilder();
-
- // 使用Completion Suggester实现搜索建议
- SuggestionBuilder termSuggestionBuilder = SuggestBuilders.completionSuggestion("name_suggest")
- .prefix(prefix)
- .size(size);
-
- SuggestBuilder suggestBuilder = new SuggestBuilder();
- suggestBuilder.addSuggestion("name_suggest", termSuggestionBuilder);
-
- sourceBuilder.suggest(suggestBuilder);
- request.source(sourceBuilder);
-
- SearchResponse response = client.search(request, RequestOptions.DEFAULT);
- Suggest suggest = response.getSuggest();
- CompletionSuggestion completionSuggestion = suggest.getSuggestion("name_suggest");
-
- List<String> suggestions = new ArrayList<>();
- for (CompletionSuggestion.Entry entry : completionSuggestion.getEntries()) {
- for (CompletionSuggestion.Entry.Option option : entry.getOptions()) {
- suggestions.add(option.getText().string());
- }
- }
-
- return suggestions;
- } catch (IOException e) {
- throw new RuntimeException("获取搜索建议失败", e);
- }
- }
-
- @Override
- public SearchAggregations getAggregations(SearchCriteria criteria) {
- try {
- SearchRequest request = new SearchRequest("products");
- SearchSourceBuilder sourceBuilder = new SearchSourceBuilder();
-
- // 构建查询
- BoolQueryBuilder boolQuery = QueryBuilders.boolQuery();
-
- // 关键词搜索
- if (criteria.getKeyword() != null && !criteria.getKeyword().isEmpty()) {
- boolQuery.should(QueryBuilders.matchQuery("name", criteria.getKeyword()));
- boolQuery.should(QueryBuilders.matchQuery("description", criteria.getKeyword()));
- }
-
- // 分类过滤
- if (criteria.getCategory() != null && !criteria.getCategory().isEmpty()) {
- boolQuery.filter(QueryBuilders.termQuery("category", criteria.getCategory()));
- }
-
- // 价格范围过滤
- if (criteria.getMinPrice() != null || criteria.getMaxPrice() != null) {
- RangeQueryBuilder rangeQuery = QueryBuilders.rangeQuery("price");
- if (criteria.getMinPrice() != null) {
- rangeQuery.gte(criteria.getMinPrice());
- }
- if (criteria.getMaxPrice() != null) {
- rangeQuery.lte(criteria.getMaxPrice());
- }
- boolQuery.filter(rangeQuery);
- }
-
- sourceBuilder.query(boolQuery);
- sourceBuilder.size(0); // 不返回文档,只返回聚合结果
-
- // 添加分类聚合
- TermsAggregationBuilder categoryAggregation = AggregationBuilders.terms("categories")
- .field("category")
- .size(10);
-
- // 添加价格范围聚合
- RangeAggregationBuilder priceRangeAggregation = AggregationBuilders.range("price_ranges")
- .field("price")
- .addRange(0, 50)
- .addRange(50, 100)
- .addRange(100, 200)
- .addRange(200, 500)
- .addRange(500, Double.MAX_VALUE);
-
- // 添加品牌聚合
- TermsAggregationBuilder brandAggregation = AggregationBuilders.terms("brands")
- .field("brand")
- .size(10);
-
- sourceBuilder.aggregation(categoryAggregation);
- sourceBuilder.aggregation(priceRangeAggregation);
- sourceBuilder.aggregation(brandAggregation);
-
- request.source(sourceBuilder);
-
- // 执行搜索
- SearchResponse response = client.search(request, RequestOptions.DEFAULT);
-
- // 处理聚合结果
- SearchAggregations result = new SearchAggregations();
-
- // 处理分类聚合
- Terms categoriesAgg = response.getAggregations().get("categories");
- List<SearchAggregations.CategoryCount> categories = new ArrayList<>();
- for (Terms.Bucket bucket : categoriesAgg.getBuckets()) {
- SearchAggregations.CategoryCount categoryCount = new SearchAggregations.CategoryCount();
- categoryCount.setName(bucket.getKeyAsString());
- categoryCount.setCount(bucket.getDocCount());
- categories.add(categoryCount);
- }
- result.setCategories(categories);
-
- // 处理价格范围聚合
- Range priceRangesAgg = response.getAggregations().get("price_ranges");
- List<SearchAggregations.PriceRangeCount> priceRanges = new ArrayList<>();
- for (Range.Bucket bucket : priceRangesAgg.getBuckets()) {
- SearchAggregations.PriceRangeCount priceRangeCount = new SearchAggregations.PriceRangeCount();
- priceRangeCount.setFrom(bucket.getFrom() != null ? bucket.getFrom().doubleValue() : 0);
- priceRangeCount.setTo(bucket.getTo() != null ? bucket.getTo().doubleValue() : Double.MAX_VALUE);
- priceRangeCount.setCount(bucket.getDocCount());
- priceRanges.add(priceRangeCount);
- }
- result.setPriceRanges(priceRanges);
-
- // 处理品牌聚合
- Terms brandsAgg = response.getAggregations().get("brands");
- List<SearchAggregations.BrandCount> brands = new ArrayList<>();
- for (Terms.Bucket bucket : brandsAgg.getBuckets()) {
- SearchAggregations.BrandCount brandCount = new SearchAggregations.BrandCount();
- brandCount.setName(bucket.getKeyAsString());
- brandCount.setCount(bucket.getDocCount());
- brands.add(brandCount);
- }
- result.setBrands(brands);
-
- return result;
- } catch (IOException e) {
- throw new RuntimeException("获取聚合结果失败", e);
- }
- }
-
- @PreDestroy
- public void close() {
- try {
- client.close();
- } catch (IOException e) {
- e.printStackTrace();
- }
- }
- }
复制代码- @RestController
- @RequestMapping("/api/search")
- public class SearchController {
-
- private final ProductSearchService searchService;
-
- public SearchController(ProductSearchService searchService) {
- this.searchService = searchService;
- }
-
- @GetMapping("/products")
- public ResponseEntity<SearchResult<Product>> searchProducts(
- @RequestParam(required = false) String keyword,
- @RequestParam(required = false) String category,
- @RequestParam(required = false) Double minPrice,
- @RequestParam(required = false) Double maxPrice,
- @RequestParam(required = false) List<String> brands,
- @RequestParam(required = false) SearchCriteria.SortOption sort,
- @RequestParam(defaultValue = "0") int page,
- @RequestParam(defaultValue = "10") int size) {
-
- SearchCriteria criteria = new SearchCriteria();
- criteria.setKeyword(keyword);
- criteria.setCategory(category);
- criteria.setMinPrice(minPrice);
- criteria.setMaxPrice(maxPrice);
- criteria.setBrands(brands);
- criteria.setSortOption(sort);
-
- SearchResult<Product> result = searchService.advancedSearch(criteria, page, size);
- return ResponseEntity.ok(result);
- }
-
- @GetMapping("/suggestions")
- public ResponseEntity<List<String>> getSuggestions(
- @RequestParam String prefix,
- @RequestParam(defaultValue = "5") int size) {
-
- List<String> suggestions = searchService.getSuggestions(prefix, size);
- return ResponseEntity.ok(suggestions);
- }
-
- @GetMapping("/aggregations")
- public ResponseEntity<SearchAggregations> getAggregations(
- @RequestParam(required = false) String keyword,
- @RequestParam(required = false) String category,
- @RequestParam(required = false) Double minPrice,
- @RequestParam(required = false) Double maxPrice,
- @RequestParam(required = false) List<String> brands) {
-
- SearchCriteria criteria = new SearchCriteria();
- criteria.setKeyword(keyword);
- criteria.setCategory(category);
- criteria.setMinPrice(minPrice);
- criteria.setMaxPrice(maxPrice);
- criteria.setBrands(brands);
-
- SearchAggregations aggregations = searchService.getAggregations(criteria);
- return ResponseEntity.ok(aggregations);
- }
- }
复制代码
最佳实践和注意事项
索引设计最佳实践
1. 选择合适的字段类型:根据数据特性选择合适的字段类型,如文本、数值、日期等
2. 合理设置分析器:根据语言和搜索需求选择合适的分析器
3. 避免过度索引:只对需要搜索的字段建立索引,减少资源消耗
4. 使用复合索引:对经常一起查询的字段创建复合索引
5. 定期维护索引:定期优化和重建索引,保持搜索性能
- // Elasticsearch索引映射示例
- PUT /products
- {
- "mappings": {
- "properties": {
- "id": {
- "type": "keyword"
- },
- "name": {
- "type": "text",
- "analyzer": "standard",
- "fields": {
- "keyword": {
- "type": "keyword",
- "ignore_above": 256
- }
- }
- },
- "description": {
- "type": "text",
- "analyzer": "standard"
- },
- "category": {
- "type": "keyword"
- },
- "price": {
- "type": "double"
- },
- "brand": {
- "type": "keyword"
- },
- "attributes": {
- "type": "nested",
- "properties": {
- "name": {
- "type": "keyword"
- },
- "value": {
- "type": "keyword"
- }
- }
- },
- "createdAt": {
- "type": "date"
- },
- "rating": {
- "type": "double"
- },
- "name_suggest": {
- "type": "completion"
- }
- }
- }
- }
复制代码
查询优化最佳实践
1. 避免使用通配符开头的查询:如*keyword,这会导致全表扫描
2. 使用过滤器而非查询:对于不需要计算相关性的条件,使用过滤器提高性能
3. 限制返回字段:只返回需要的字段,减少数据传输量
4. 合理使用分页:避免深度分页,使用游标分页替代
5. 缓存常用查询结果:对高频查询结果进行缓存
- // 优化查询示例
- public List<Product> optimizedSearch(String keyword, String category, int page, int size) {
- BoolQueryBuilder boolQuery = QueryBuilders.boolQuery();
-
- // 使用must而非should确保相关性
- if (keyword != null && !keyword.isEmpty()) {
- boolQuery.must(QueryBuilders.matchQuery("name", keyword));
- }
-
- // 使用filter而非must提高性能
- if (category != null && !category.isEmpty()) {
- boolQuery.filter(QueryBuilders.termQuery("category", category));
- }
-
- SearchSourceBuilder sourceBuilder = new SearchSourceBuilder()
- .query(boolQuery)
- .from(page * size)
- .size(size)
- .fetchSource(new String[]{"id", "name", "price", "image"}, null); // 只返回需要的字段
-
- // 执行查询...
- }
复制代码
性能监控与调优
1. 监控搜索响应时间:设置性能监控,及时发现性能问题
2. 分析慢查询:定期分析慢查询,优化索引和查询
3. 合理分配资源:根据负载情况调整搜索服务资源
4. 使用性能分析工具:如Elasticsearch的Profile API
- // 使用Profile API分析查询性能
- public void analyzeQueryPerformance(String keyword) {
- SearchRequest request = new SearchRequest("products");
- SearchSourceBuilder sourceBuilder = new SearchSourceBuilder();
-
- sourceBuilder.query(QueryBuilders.matchQuery("name", keyword));
- sourceBuilder.profile(true); // 启用性能分析
-
- request.source(sourceBuilder);
-
- try {
- SearchResponse response = client.search(request, RequestOptions.DEFAULT);
-
- // 获取分析结果
- ProfileShardResult profileShardResult = response.getProfileResults().get(0);
- List<QueryProfileShardResult> queryProfileShardResults = profileShardResult.getSearchResults();
-
- // 分析查询性能
- for (QueryProfileShardResult queryProfileResult : queryProfileShardResults) {
- // 输出查询时间
- System.out.println("Query time: " + queryProfileResult.getQueryTime() + "ms");
-
- // 分析查询详情
- ProfileResult profileResult = queryProfileResult.getQuery();
- printProfileResult(profileResult, 0);
- }
- } catch (IOException e) {
- e.printStackTrace();
- }
- }
- private void printProfileResult(ProfileResult profileResult, int indent) {
- String indentStr = " ".repeat(indent);
- System.out.println(indentStr + "Type: " + profileResult.getQueryType());
- System.out.println(indentStr + "Time: " + profileResult.getTime() + "ms");
- System.out.println(indentStr + "Description: " + profileResult.getDescription());
-
- for (ProfileResult child : profileResult.getChildren()) {
- printProfileResult(child, indent + 1);
- }
- }
复制代码
搜索结果排序与相关性优化
1. 使用相关性评分:利用搜索引擎内置的相关性评分机制
2. 自定义评分:根据业务需求自定义评分规则
3. 使用boost调整权重:对不同字段设置不同权重
4. 结合业务规则排序:如新品优先、热销优先等
- // 自定义评分查询示例
- public List<Product> searchWithCustomScoring(String keyword, String category) {
- BoolQueryBuilder boolQuery = QueryBuilders.boolQuery();
-
- // 基本查询
- if (keyword != null && !keyword.isEmpty()) {
- boolQuery.should(QueryBuilders.matchQuery("name", keyword).boost(2.0f));
- boolQuery.should(QueryBuilders.matchQuery("description", keyword).boost(1.0f));
- }
-
- if (category != null && !category.isEmpty()) {
- boolQuery.filter(QueryBuilders.termQuery("category", category));
- }
-
- // 使用Function Score Query自定义评分
- FunctionScoreQueryBuilder functionScoreQuery = QueryBuilders.functionScoreQuery(
- boolQuery,
- new FunctionScoreQueryBuilder.FilterFunctionBuilder[] {
- // 新品评分提升
- new FunctionScoreQueryBuilder.FilterFunctionBuilder(
- QueryBuilders.matchAllQuery(),
- ScoreFunctionBuilders.exponentialDecayFunction("createdAt", "now", "30d", "7d")
- ),
- // 高评分产品提升
- new FunctionScoreQueryBuilder.FilterFunctionBuilder(
- QueryBuilders.matchAllQuery(),
- ScoreFunctionBuilders.fieldValueFactorFunction("rating")
- .modifier(FieldValueFactorFunctionModifier.LN1P)
- .missing(0)
- ),
- // 库存充足产品提升
- new FunctionScoreQueryBuilder.FilterFunctionBuilder(
- QueryBuilders.rangeQuery("stock").gt(10),
- ScoreFunctionBuilders.weightFactorFunction(1.5f)
- )
- }
- ).boostMode(CombineFunction.MULTIPLY).scoreMode(FunctionScoreQuery.ScoreMode.SUM);
-
- SearchSourceBuilder sourceBuilder = new SearchSourceBuilder()
- .query(functionScoreQuery);
-
- // 执行查询...
- }
复制代码
结论
在Java项目中实现高效搜索功能需要综合考虑多种因素,包括数据规模、性能需求、功能复杂度等。本文详细介绍了从基础数据结构搜索到高级全文搜索引擎的各种技术方法,并通过实际案例展示了如何构建完整的搜索解决方案。
对于小型应用,可以使用Java内置数据结构或数据库查询实现基本搜索功能;对于中型应用,可以集成Lucene或Hibernate Search提供更强大的搜索能力;对于大型分布式应用,Elasticsearch则是最佳选择,它提供了高性能、可扩展的搜索解决方案。
无论选择哪种技术,都需要关注索引设计、查询优化、性能监控和结果相关性等关键方面,以构建满足用户需求的高效搜索系统。通过合理应用本文介绍的技术和方法,开发者可以在Java项目中实现快速、准确、用户友好的搜索功能,提升应用的整体用户体验。
版权声明
1、转载或引用本网站内容(如何在Java项目中高效实现搜索功能核心技术与方法解析)须注明原网址及作者(威震华夏关云长),并标明本网站网址(https://www.pixtech.cc/)。
2、对于不当转载或引用本网站内容而引起的民事纷争、行政处理或其他损失,本网站不承担责任。
3、对不遵守本声明或其他违法、恶意使用本网站内容者,本网站保留追究其法律责任的权利。
本文地址: https://www.pixtech.cc/thread-34374-1-1.html
|
|