gitea/modules/indexer/issues/bleve.go

281 lines
7.7 KiB
Go
Raw Normal View History

2019-02-19 22:39:39 +08:00
// Copyright 2018 The Gitea Authors. All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
package issues
import (
"context"
2019-02-19 22:39:39 +08:00
"fmt"
"os"
"strconv"
gitea_bleve "code.gitea.io/gitea/modules/indexer/bleve"
"code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/modules/util"
"github.com/blevesearch/bleve/v2"
"github.com/blevesearch/bleve/v2/analysis/analyzer/custom"
"github.com/blevesearch/bleve/v2/analysis/token/lowercase"
"github.com/blevesearch/bleve/v2/analysis/token/unicodenorm"
"github.com/blevesearch/bleve/v2/analysis/tokenizer/unicode"
"github.com/blevesearch/bleve/v2/index/upsidedown"
"github.com/blevesearch/bleve/v2/mapping"
"github.com/blevesearch/bleve/v2/search/query"
2019-02-19 22:39:39 +08:00
"github.com/ethantkoenig/rupture"
)
const (
issueIndexerAnalyzer = "issueIndexer"
issueIndexerDocType = "issueIndexerDocType"
issueIndexerLatestVersion = 1
)
// indexerID a bleve-compatible unique identifier for an integer id
func indexerID(id int64) string {
return strconv.FormatInt(id, 36)
}
// idOfIndexerID the integer id associated with an indexer id
func idOfIndexerID(indexerID string) (int64, error) {
id, err := strconv.ParseInt(indexerID, 36, 64)
if err != nil {
return 0, fmt.Errorf("Unexpected indexer ID %s: %w", indexerID, err)
2019-02-19 22:39:39 +08:00
}
return id, nil
}
// numericEqualityQuery a numeric equality query for the given value and field
func numericEqualityQuery(value int64, field string) *query.NumericRangeQuery {
f := float64(value)
tru := true
q := bleve.NewNumericRangeInclusiveQuery(&f, &f, &tru, &tru)
q.SetField(field)
return q
}
func newMatchPhraseQuery(matchPhrase, field, analyzer string) *query.MatchPhraseQuery {
q := bleve.NewMatchPhraseQuery(matchPhrase)
q.FieldVal = field
q.Analyzer = analyzer
return q
}
const unicodeNormalizeName = "unicodeNormalize"
func addUnicodeNormalizeTokenFilter(m *mapping.IndexMappingImpl) error {
return m.AddCustomTokenFilter(unicodeNormalizeName, map[string]interface{}{
"type": unicodenorm.Name,
"form": unicodenorm.NFC,
})
}
const maxBatchSize = 16
// openIndexer open the index at the specified path, checking for metadata
// updates and bleve version updates. If index needs to be created (or
// re-created), returns (nil, nil)
func openIndexer(path string, latestVersion int) (bleve.Index, error) {
_, err := os.Stat(path)
if err != nil && os.IsNotExist(err) {
return nil, nil
} else if err != nil {
return nil, err
}
metadata, err := rupture.ReadIndexMetadata(path)
if err != nil {
return nil, err
}
if metadata.Version < latestVersion {
// the indexer is using a previous version, so we should delete it and
// re-populate
return nil, util.RemoveAll(path)
2019-02-19 22:39:39 +08:00
}
index, err := bleve.Open(path)
if err != nil && err == upsidedown.IncompatibleVersion {
// the indexer was built with a previous version of bleve, so we should
// delete it and re-populate
return nil, util.RemoveAll(path)
2019-02-19 22:39:39 +08:00
} else if err != nil {
return nil, err
}
return index, nil
}
// BleveIndexerData an update to the issue indexer
type BleveIndexerData IndexerData
// Type returns the document type, for bleve's mapping.Classifier interface.
func (i *BleveIndexerData) Type() string {
return issueIndexerDocType
}
// createIssueIndexer create an issue indexer if one does not already exist
func createIssueIndexer(path string, latestVersion int) (bleve.Index, error) {
mapping := bleve.NewIndexMapping()
docMapping := bleve.NewDocumentMapping()
numericFieldMapping := bleve.NewNumericFieldMapping()
numericFieldMapping.IncludeInAll = false
docMapping.AddFieldMappingsAt("RepoID", numericFieldMapping)
textFieldMapping := bleve.NewTextFieldMapping()
textFieldMapping.Store = false
textFieldMapping.IncludeInAll = false
docMapping.AddFieldMappingsAt("Title", textFieldMapping)
docMapping.AddFieldMappingsAt("Content", textFieldMapping)
docMapping.AddFieldMappingsAt("Comments", textFieldMapping)
if err := addUnicodeNormalizeTokenFilter(mapping); err != nil {
return nil, err
} else if err = mapping.AddCustomAnalyzer(issueIndexerAnalyzer, map[string]interface{}{
"type": custom.Name,
"char_filters": []string{},
"tokenizer": unicode.Name,
"token_filters": []string{unicodeNormalizeName, lowercase.Name},
}); err != nil {
return nil, err
}
mapping.DefaultAnalyzer = issueIndexerAnalyzer
mapping.AddDocumentMapping(issueIndexerDocType, docMapping)
mapping.AddDocumentMapping("_all", bleve.NewDocumentDisabledMapping())
index, err := bleve.New(path, mapping)
if err != nil {
return nil, err
}
if err = rupture.WriteIndexMetadata(path, &rupture.IndexMetadata{
Version: latestVersion,
}); err != nil {
return nil, err
}
return index, nil
}
var _ Indexer = &BleveIndexer{}
2019-02-19 22:39:39 +08:00
// BleveIndexer implements Indexer interface
type BleveIndexer struct {
indexDir string
indexer bleve.Index
}
// NewBleveIndexer creates a new bleve local indexer
func NewBleveIndexer(indexDir string) *BleveIndexer {
return &BleveIndexer{
indexDir: indexDir,
}
}
// Init will initialize the indexer
2019-02-19 22:39:39 +08:00
func (b *BleveIndexer) Init() (bool, error) {
var err error
b.indexer, err = openIndexer(b.indexDir, issueIndexerLatestVersion)
if err != nil {
return false, err
}
if b.indexer != nil {
return true, nil
}
b.indexer, err = createIssueIndexer(b.indexDir, issueIndexerLatestVersion)
return false, err
}
// SetAvailabilityChangeCallback does nothing
func (b *BleveIndexer) SetAvailabilityChangeCallback(callback func(bool)) {
}
// Ping does nothing
func (b *BleveIndexer) Ping() bool {
return true
}
// Close will close the bleve indexer
func (b *BleveIndexer) Close() {
if b.indexer != nil {
if err := b.indexer.Close(); err != nil {
log.Error("Error whilst closing indexer: %v", err)
}
}
}
2019-02-19 22:39:39 +08:00
// Index will save the index data
func (b *BleveIndexer) Index(issues []*IndexerData) error {
batch := gitea_bleve.NewFlushingBatch(b.indexer, maxBatchSize)
2019-02-19 22:39:39 +08:00
for _, issue := range issues {
if err := batch.Index(indexerID(issue.ID), struct {
RepoID int64
Title string
Content string
Comments []string
}{
RepoID: issue.RepoID,
Title: issue.Title,
Content: issue.Content,
Comments: issue.Comments,
}); err != nil {
return err
}
}
return batch.Flush()
}
// Delete deletes indexes by ids
func (b *BleveIndexer) Delete(ids ...int64) error {
batch := gitea_bleve.NewFlushingBatch(b.indexer, maxBatchSize)
2019-02-19 22:39:39 +08:00
for _, id := range ids {
if err := batch.Delete(indexerID(id)); err != nil {
return err
}
}
return batch.Flush()
}
// Search searches for issues by given conditions.
// Returns the matching issue IDs
func (b *BleveIndexer) Search(ctx context.Context, keyword string, repoIDs []int64, limit, start int) (*SearchResult, error) {
Allow cross-repository dependencies on issues (#7901) * in progress changes for #7405, added ability to add cross-repo dependencies * removed unused repolink var * fixed query that was breaking ci tests; fixed check in issue dependency add so that the id of the issue and dependency is checked rather than the indexes * reverted removal of string in local files becasue these are done via crowdin, not updated manually * removed 'Select("issue.*")' from getBlockedByDependencies and getBlockingDependencies based on comments in PR review * changed getBlockedByDependencies and getBlockingDependencies to use a more xorm-like query, also updated the sidebar as a result * simplified the getBlockingDependencies and getBlockedByDependencies methods; changed the sidebar to show the dependencies in a different format where you can see the name of the repository * made some changes to the issue view in the dependencies (issue name on top, repo full name on separate line). Change view of issue in the dependency search results (also showing the full repo name on separate line) * replace call to FindUserAccessibleRepoIDs with SearchRepositoryByName. The former was hardcoded to use isPrivate = false on the repo search, but this code needed it to be true. The SearchRepositoryByName method is used more in the code including on the user's dashboard * some more tweaks to the layout of the issues when showing dependencies and in the search box when you add new dependencies * added Name to the RepositoryMeta struct * updated swagger doc * fixed total count for link header on SearchIssues * fixed indentation * fixed aligment of remove icon on dependencies in issue sidebar * removed unnecessary nil check (unnecessary because issue.loadRepo is called prior to this block) * reverting .css change, somehow missed or forgot that less is used * updated less file and generated css; updated sidebar template with styles to line up delete and issue index * added ordering to the blocked by/depends on queries * fixed sorting in issue dependency search and the depends on/blocks views to show issues from the current repo first, then by created date descending; added a "all cross repository dependencies" setting to allow this feature to be turned off, if turned off, the issue dependency search will work the way it did before (restricted to the current repository) * re-applied my swagger changes after merge * fixed split string condition in issue search * changed ALLOW_CROSS_REPOSITORY_DEPENDENCIES description to sound more global than just the issue dependency search; returning 400 in the cross repo issue search api method if not enabled; fixed bug where the issue count did not respect the state parameter * when adding a dependency to an issue, added a check to make sure the issue and dependency are in the same repo if cross repo dependencies is not enabled * updated sortIssuesSession call in PullRequests, another commit moved this method from pull.go to pull_list.go so I had to re-apply my change here * fixed incorrect setting of user id parameter in search repos call
2019-10-31 13:06:10 +08:00
var repoQueriesP []*query.NumericRangeQuery
for _, repoID := range repoIDs {
repoQueriesP = append(repoQueriesP, numericEqualityQuery(repoID, "RepoID"))
}
repoQueries := make([]query.Query, len(repoQueriesP))
for i, v := range repoQueriesP {
repoQueries[i] = query.Query(v)
}
2019-02-19 22:39:39 +08:00
indexerQuery := bleve.NewConjunctionQuery(
Allow cross-repository dependencies on issues (#7901) * in progress changes for #7405, added ability to add cross-repo dependencies * removed unused repolink var * fixed query that was breaking ci tests; fixed check in issue dependency add so that the id of the issue and dependency is checked rather than the indexes * reverted removal of string in local files becasue these are done via crowdin, not updated manually * removed 'Select("issue.*")' from getBlockedByDependencies and getBlockingDependencies based on comments in PR review * changed getBlockedByDependencies and getBlockingDependencies to use a more xorm-like query, also updated the sidebar as a result * simplified the getBlockingDependencies and getBlockedByDependencies methods; changed the sidebar to show the dependencies in a different format where you can see the name of the repository * made some changes to the issue view in the dependencies (issue name on top, repo full name on separate line). Change view of issue in the dependency search results (also showing the full repo name on separate line) * replace call to FindUserAccessibleRepoIDs with SearchRepositoryByName. The former was hardcoded to use isPrivate = false on the repo search, but this code needed it to be true. The SearchRepositoryByName method is used more in the code including on the user's dashboard * some more tweaks to the layout of the issues when showing dependencies and in the search box when you add new dependencies * added Name to the RepositoryMeta struct * updated swagger doc * fixed total count for link header on SearchIssues * fixed indentation * fixed aligment of remove icon on dependencies in issue sidebar * removed unnecessary nil check (unnecessary because issue.loadRepo is called prior to this block) * reverting .css change, somehow missed or forgot that less is used * updated less file and generated css; updated sidebar template with styles to line up delete and issue index * added ordering to the blocked by/depends on queries * fixed sorting in issue dependency search and the depends on/blocks views to show issues from the current repo first, then by created date descending; added a "all cross repository dependencies" setting to allow this feature to be turned off, if turned off, the issue dependency search will work the way it did before (restricted to the current repository) * re-applied my swagger changes after merge * fixed split string condition in issue search * changed ALLOW_CROSS_REPOSITORY_DEPENDENCIES description to sound more global than just the issue dependency search; returning 400 in the cross repo issue search api method if not enabled; fixed bug where the issue count did not respect the state parameter * when adding a dependency to an issue, added a check to make sure the issue and dependency are in the same repo if cross repo dependencies is not enabled * updated sortIssuesSession call in PullRequests, another commit moved this method from pull.go to pull_list.go so I had to re-apply my change here * fixed incorrect setting of user id parameter in search repos call
2019-10-31 13:06:10 +08:00
bleve.NewDisjunctionQuery(repoQueries...),
2019-02-19 22:39:39 +08:00
bleve.NewDisjunctionQuery(
newMatchPhraseQuery(keyword, "Title", issueIndexerAnalyzer),
newMatchPhraseQuery(keyword, "Content", issueIndexerAnalyzer),
newMatchPhraseQuery(keyword, "Comments", issueIndexerAnalyzer),
))
search := bleve.NewSearchRequestOptions(indexerQuery, limit, start, false)
search.SortBy([]string{"-_score"})
2019-02-19 22:39:39 +08:00
result, err := b.indexer.SearchInContext(ctx, search)
2019-02-19 22:39:39 +08:00
if err != nil {
return nil, err
}
ret := SearchResult{
2019-02-19 22:39:39 +08:00
Hits: make([]Match, 0, len(result.Hits)),
}
for _, hit := range result.Hits {
id, err := idOfIndexerID(hit.ID)
if err != nil {
return nil, err
}
ret.Hits = append(ret.Hits, Match{
Allow cross-repository dependencies on issues (#7901) * in progress changes for #7405, added ability to add cross-repo dependencies * removed unused repolink var * fixed query that was breaking ci tests; fixed check in issue dependency add so that the id of the issue and dependency is checked rather than the indexes * reverted removal of string in local files becasue these are done via crowdin, not updated manually * removed 'Select("issue.*")' from getBlockedByDependencies and getBlockingDependencies based on comments in PR review * changed getBlockedByDependencies and getBlockingDependencies to use a more xorm-like query, also updated the sidebar as a result * simplified the getBlockingDependencies and getBlockedByDependencies methods; changed the sidebar to show the dependencies in a different format where you can see the name of the repository * made some changes to the issue view in the dependencies (issue name on top, repo full name on separate line). Change view of issue in the dependency search results (also showing the full repo name on separate line) * replace call to FindUserAccessibleRepoIDs with SearchRepositoryByName. The former was hardcoded to use isPrivate = false on the repo search, but this code needed it to be true. The SearchRepositoryByName method is used more in the code including on the user's dashboard * some more tweaks to the layout of the issues when showing dependencies and in the search box when you add new dependencies * added Name to the RepositoryMeta struct * updated swagger doc * fixed total count for link header on SearchIssues * fixed indentation * fixed aligment of remove icon on dependencies in issue sidebar * removed unnecessary nil check (unnecessary because issue.loadRepo is called prior to this block) * reverting .css change, somehow missed or forgot that less is used * updated less file and generated css; updated sidebar template with styles to line up delete and issue index * added ordering to the blocked by/depends on queries * fixed sorting in issue dependency search and the depends on/blocks views to show issues from the current repo first, then by created date descending; added a "all cross repository dependencies" setting to allow this feature to be turned off, if turned off, the issue dependency search will work the way it did before (restricted to the current repository) * re-applied my swagger changes after merge * fixed split string condition in issue search * changed ALLOW_CROSS_REPOSITORY_DEPENDENCIES description to sound more global than just the issue dependency search; returning 400 in the cross repo issue search api method if not enabled; fixed bug where the issue count did not respect the state parameter * when adding a dependency to an issue, added a check to make sure the issue and dependency are in the same repo if cross repo dependencies is not enabled * updated sortIssuesSession call in PullRequests, another commit moved this method from pull.go to pull_list.go so I had to re-apply my change here * fixed incorrect setting of user id parameter in search repos call
2019-10-31 13:06:10 +08:00
ID: id,
2019-02-19 22:39:39 +08:00
})
}
return &ret, nil
}