PageRenderTime 54ms CodeModel.GetById 15ms RepoModel.GetById 1ms app.codeStats 0ms

/vendor/google.golang.org/appengine/search/search.go

http://github.com/garyburd/gopkgdoc
Go | 1189 lines | 881 code | 110 blank | 198 comment | 225 complexity | 102850a8ce046c228d49e7798d8c7ee3 MD5 | raw file
Possible License(s): BSD-3-Clause, Apache-2.0, MPL-2.0-no-copyleft-exception, BSD-2-Clause
  1. // Copyright 2012 Google Inc. All rights reserved.
  2. // Use of this source code is governed by the Apache 2.0
  3. // license that can be found in the LICENSE file.
  4. package search // import "google.golang.org/appengine/search"
  5. // TODO: let Put specify the document language: "en", "fr", etc. Also: order_id?? storage??
  6. // TODO: Index.GetAll (or Iterator.GetAll)?
  7. // TODO: struct <-> protobuf tests.
  8. // TODO: enforce Python's MIN_NUMBER_VALUE and MIN_DATE (which would disallow a zero
  9. // time.Time)? _MAXIMUM_STRING_LENGTH?
  10. import (
  11. "errors"
  12. "fmt"
  13. "math"
  14. "reflect"
  15. "regexp"
  16. "strconv"
  17. "strings"
  18. "time"
  19. "unicode/utf8"
  20. "github.com/golang/protobuf/proto"
  21. "golang.org/x/net/context"
  22. "google.golang.org/appengine"
  23. "google.golang.org/appengine/internal"
  24. pb "google.golang.org/appengine/internal/search"
  25. )
  26. const maxDocumentsPerPutDelete = 200
  27. var (
  28. // ErrInvalidDocumentType is returned when methods like Put, Get or Next
  29. // are passed a dst or src argument of invalid type.
  30. ErrInvalidDocumentType = errors.New("search: invalid document type")
  31. // ErrNoSuchDocument is returned when no document was found for a given ID.
  32. ErrNoSuchDocument = errors.New("search: no such document")
  33. // ErrTooManyDocuments is returned when the user passes too many documents to
  34. // PutMulti or DeleteMulti.
  35. ErrTooManyDocuments = fmt.Errorf("search: too many documents given to put or delete (max is %d)", maxDocumentsPerPutDelete)
  36. )
  37. // Atom is a document field whose contents are indexed as a single indivisible
  38. // string.
  39. type Atom string
  40. // HTML is a document field whose contents are indexed as HTML. Only text nodes
  41. // are indexed: "foo<b>bar" will be treated as "foobar".
  42. type HTML string
  43. // validIndexNameOrDocID is the Go equivalent of Python's
  44. // _ValidateVisiblePrintableAsciiNotReserved.
  45. func validIndexNameOrDocID(s string) bool {
  46. if strings.HasPrefix(s, "!") {
  47. return false
  48. }
  49. for _, c := range s {
  50. if c < 0x21 || 0x7f <= c {
  51. return false
  52. }
  53. }
  54. return true
  55. }
  56. var (
  57. fieldNameRE = regexp.MustCompile(`^[A-Za-z][A-Za-z0-9_]*$`)
  58. languageRE = regexp.MustCompile(`^[a-z]{2}$`)
  59. )
  60. // validFieldName is the Go equivalent of Python's _CheckFieldName. It checks
  61. // the validity of both field and facet names.
  62. func validFieldName(s string) bool {
  63. return len(s) <= 500 && fieldNameRE.MatchString(s)
  64. }
  65. // validDocRank checks that the ranks is in the range [0, 2^31).
  66. func validDocRank(r int) bool {
  67. return 0 <= r && r <= (1<<31-1)
  68. }
  69. // validLanguage checks that a language looks like ISO 639-1.
  70. func validLanguage(s string) bool {
  71. return languageRE.MatchString(s)
  72. }
  73. // validFloat checks that f is in the range [-2147483647, 2147483647].
  74. func validFloat(f float64) bool {
  75. return -(1<<31-1) <= f && f <= (1<<31-1)
  76. }
  77. // Index is an index of documents.
  78. type Index struct {
  79. spec pb.IndexSpec
  80. }
  81. // orderIDEpoch forms the basis for populating OrderId on documents.
  82. var orderIDEpoch = time.Date(2011, 1, 1, 0, 0, 0, 0, time.UTC)
  83. // Open opens the index with the given name. The index is created if it does
  84. // not already exist.
  85. //
  86. // The name is a human-readable ASCII string. It must contain no whitespace
  87. // characters and not start with "!".
  88. func Open(name string) (*Index, error) {
  89. if !validIndexNameOrDocID(name) {
  90. return nil, fmt.Errorf("search: invalid index name %q", name)
  91. }
  92. return &Index{
  93. spec: pb.IndexSpec{
  94. Name: &name,
  95. },
  96. }, nil
  97. }
  98. // Put saves src to the index. If id is empty, a new ID is allocated by the
  99. // service and returned. If id is not empty, any existing index entry for that
  100. // ID is replaced.
  101. //
  102. // The ID is a human-readable ASCII string. It must contain no whitespace
  103. // characters and not start with "!".
  104. //
  105. // src must be a non-nil struct pointer or implement the FieldLoadSaver
  106. // interface.
  107. func (x *Index) Put(c context.Context, id string, src interface{}) (string, error) {
  108. ids, err := x.PutMulti(c, []string{id}, []interface{}{src})
  109. if err != nil {
  110. return "", err
  111. }
  112. return ids[0], nil
  113. }
  114. // PutMulti is like Put, but is more efficient for adding multiple documents to
  115. // the index at once.
  116. //
  117. // Up to 200 documents can be added at once. ErrTooManyDocuments is returned if
  118. // you try to add more.
  119. //
  120. // ids can either be an empty slice (which means new IDs will be allocated for
  121. // each of the documents added) or a slice the same size as srcs.
  122. //
  123. // The error may be an instance of appengine.MultiError, in which case it will
  124. // be the same size as srcs and the individual errors inside will correspond
  125. // with the items in srcs.
  126. func (x *Index) PutMulti(c context.Context, ids []string, srcs []interface{}) ([]string, error) {
  127. if len(ids) != 0 && len(srcs) != len(ids) {
  128. return nil, fmt.Errorf("search: PutMulti expects ids and srcs slices of the same length")
  129. }
  130. if len(srcs) > maxDocumentsPerPutDelete {
  131. return nil, ErrTooManyDocuments
  132. }
  133. docs := make([]*pb.Document, len(srcs))
  134. for i, s := range srcs {
  135. var err error
  136. docs[i], err = saveDoc(s)
  137. if err != nil {
  138. return nil, err
  139. }
  140. if len(ids) != 0 && ids[i] != "" {
  141. if !validIndexNameOrDocID(ids[i]) {
  142. return nil, fmt.Errorf("search: invalid ID %q", ids[i])
  143. }
  144. docs[i].Id = proto.String(ids[i])
  145. }
  146. }
  147. // spec is modified by Call when applying the current Namespace, so copy it to
  148. // avoid retaining the namespace beyond the scope of the Call.
  149. spec := x.spec
  150. req := &pb.IndexDocumentRequest{
  151. Params: &pb.IndexDocumentParams{
  152. Document: docs,
  153. IndexSpec: &spec,
  154. },
  155. }
  156. res := &pb.IndexDocumentResponse{}
  157. if err := internal.Call(c, "search", "IndexDocument", req, res); err != nil {
  158. return nil, err
  159. }
  160. multiErr, hasErr := make(appengine.MultiError, len(res.Status)), false
  161. for i, s := range res.Status {
  162. if s.GetCode() != pb.SearchServiceError_OK {
  163. multiErr[i] = fmt.Errorf("search: %s: %s", s.GetCode(), s.GetErrorDetail())
  164. hasErr = true
  165. }
  166. }
  167. if hasErr {
  168. return res.DocId, multiErr
  169. }
  170. if len(res.Status) != len(docs) || len(res.DocId) != len(docs) {
  171. return nil, fmt.Errorf("search: internal error: wrong number of results (%d Statuses, %d DocIDs, expected %d)",
  172. len(res.Status), len(res.DocId), len(docs))
  173. }
  174. return res.DocId, nil
  175. }
  176. // Get loads the document with the given ID into dst.
  177. //
  178. // The ID is a human-readable ASCII string. It must be non-empty, contain no
  179. // whitespace characters and not start with "!".
  180. //
  181. // dst must be a non-nil struct pointer or implement the FieldLoadSaver
  182. // interface.
  183. //
  184. // ErrFieldMismatch is returned when a field is to be loaded into a different
  185. // type than the one it was stored from, or when a field is missing or
  186. // unexported in the destination struct. ErrFieldMismatch is only returned if
  187. // dst is a struct pointer. It is up to the callee to decide whether this error
  188. // is fatal, recoverable, or ignorable.
  189. func (x *Index) Get(c context.Context, id string, dst interface{}) error {
  190. if id == "" || !validIndexNameOrDocID(id) {
  191. return fmt.Errorf("search: invalid ID %q", id)
  192. }
  193. req := &pb.ListDocumentsRequest{
  194. Params: &pb.ListDocumentsParams{
  195. IndexSpec: &x.spec,
  196. StartDocId: proto.String(id),
  197. Limit: proto.Int32(1),
  198. },
  199. }
  200. res := &pb.ListDocumentsResponse{}
  201. if err := internal.Call(c, "search", "ListDocuments", req, res); err != nil {
  202. return err
  203. }
  204. if res.Status == nil || res.Status.GetCode() != pb.SearchServiceError_OK {
  205. return fmt.Errorf("search: %s: %s", res.Status.GetCode(), res.Status.GetErrorDetail())
  206. }
  207. if len(res.Document) != 1 || res.Document[0].GetId() != id {
  208. return ErrNoSuchDocument
  209. }
  210. return loadDoc(dst, res.Document[0], nil)
  211. }
  212. // Delete deletes a document from the index.
  213. func (x *Index) Delete(c context.Context, id string) error {
  214. return x.DeleteMulti(c, []string{id})
  215. }
  216. // DeleteMulti deletes multiple documents from the index.
  217. //
  218. // The returned error may be an instance of appengine.MultiError, in which case
  219. // it will be the same size as srcs and the individual errors inside will
  220. // correspond with the items in srcs.
  221. func (x *Index) DeleteMulti(c context.Context, ids []string) error {
  222. if len(ids) > maxDocumentsPerPutDelete {
  223. return ErrTooManyDocuments
  224. }
  225. req := &pb.DeleteDocumentRequest{
  226. Params: &pb.DeleteDocumentParams{
  227. DocId: ids,
  228. IndexSpec: &x.spec,
  229. },
  230. }
  231. res := &pb.DeleteDocumentResponse{}
  232. if err := internal.Call(c, "search", "DeleteDocument", req, res); err != nil {
  233. return err
  234. }
  235. if len(res.Status) != len(ids) {
  236. return fmt.Errorf("search: internal error: wrong number of results (%d, expected %d)",
  237. len(res.Status), len(ids))
  238. }
  239. multiErr, hasErr := make(appengine.MultiError, len(ids)), false
  240. for i, s := range res.Status {
  241. if s.GetCode() != pb.SearchServiceError_OK {
  242. multiErr[i] = fmt.Errorf("search: %s: %s", s.GetCode(), s.GetErrorDetail())
  243. hasErr = true
  244. }
  245. }
  246. if hasErr {
  247. return multiErr
  248. }
  249. return nil
  250. }
  251. // List lists all of the documents in an index. The documents are returned in
  252. // increasing ID order.
  253. func (x *Index) List(c context.Context, opts *ListOptions) *Iterator {
  254. t := &Iterator{
  255. c: c,
  256. index: x,
  257. count: -1,
  258. listInclusive: true,
  259. more: moreList,
  260. }
  261. if opts != nil {
  262. t.listStartID = opts.StartID
  263. t.limit = opts.Limit
  264. t.idsOnly = opts.IDsOnly
  265. }
  266. return t
  267. }
  268. func moreList(t *Iterator) error {
  269. req := &pb.ListDocumentsRequest{
  270. Params: &pb.ListDocumentsParams{
  271. IndexSpec: &t.index.spec,
  272. },
  273. }
  274. if t.listStartID != "" {
  275. req.Params.StartDocId = &t.listStartID
  276. req.Params.IncludeStartDoc = &t.listInclusive
  277. }
  278. if t.limit > 0 {
  279. req.Params.Limit = proto.Int32(int32(t.limit))
  280. }
  281. if t.idsOnly {
  282. req.Params.KeysOnly = &t.idsOnly
  283. }
  284. res := &pb.ListDocumentsResponse{}
  285. if err := internal.Call(t.c, "search", "ListDocuments", req, res); err != nil {
  286. return err
  287. }
  288. if res.Status == nil || res.Status.GetCode() != pb.SearchServiceError_OK {
  289. return fmt.Errorf("search: %s: %s", res.Status.GetCode(), res.Status.GetErrorDetail())
  290. }
  291. t.listRes = res.Document
  292. t.listStartID, t.listInclusive, t.more = "", false, nil
  293. if len(res.Document) != 0 && t.limit <= 0 {
  294. if id := res.Document[len(res.Document)-1].GetId(); id != "" {
  295. t.listStartID, t.more = id, moreList
  296. }
  297. }
  298. return nil
  299. }
  300. // ListOptions are the options for listing documents in an index. Passing a nil
  301. // *ListOptions is equivalent to using the default values.
  302. type ListOptions struct {
  303. // StartID is the inclusive lower bound for the ID of the returned
  304. // documents. The zero value means all documents will be returned.
  305. StartID string
  306. // Limit is the maximum number of documents to return. The zero value
  307. // indicates no limit.
  308. Limit int
  309. // IDsOnly indicates that only document IDs should be returned for the list
  310. // operation; no document fields are populated.
  311. IDsOnly bool
  312. }
  313. // Search searches the index for the given query.
  314. func (x *Index) Search(c context.Context, query string, opts *SearchOptions) *Iterator {
  315. t := &Iterator{
  316. c: c,
  317. index: x,
  318. searchQuery: query,
  319. more: moreSearch,
  320. }
  321. if opts != nil {
  322. if opts.Cursor != "" {
  323. if opts.Offset != 0 {
  324. return errIter("at most one of Cursor and Offset may be specified")
  325. }
  326. t.searchCursor = proto.String(string(opts.Cursor))
  327. }
  328. t.limit = opts.Limit
  329. t.fields = opts.Fields
  330. t.idsOnly = opts.IDsOnly
  331. t.sort = opts.Sort
  332. t.exprs = opts.Expressions
  333. t.refinements = opts.Refinements
  334. t.facetOpts = opts.Facets
  335. t.searchOffset = opts.Offset
  336. t.countAccuracy = opts.CountAccuracy
  337. }
  338. return t
  339. }
  340. func moreSearch(t *Iterator) error {
  341. // We use per-result (rather than single/per-page) cursors since this
  342. // lets us return a Cursor for every iterator document. The two cursor
  343. // types are largely interchangeable: a page cursor is the same as the
  344. // last per-result cursor in a given search response.
  345. req := &pb.SearchRequest{
  346. Params: &pb.SearchParams{
  347. IndexSpec: &t.index.spec,
  348. Query: &t.searchQuery,
  349. Cursor: t.searchCursor,
  350. CursorType: pb.SearchParams_PER_RESULT.Enum(),
  351. FieldSpec: &pb.FieldSpec{
  352. Name: t.fields,
  353. },
  354. },
  355. }
  356. if t.limit > 0 {
  357. req.Params.Limit = proto.Int32(int32(t.limit))
  358. }
  359. if t.searchOffset > 0 {
  360. req.Params.Offset = proto.Int32(int32(t.searchOffset))
  361. t.searchOffset = 0
  362. }
  363. if t.countAccuracy > 0 {
  364. req.Params.MatchedCountAccuracy = proto.Int32(int32(t.countAccuracy))
  365. }
  366. if t.idsOnly {
  367. req.Params.KeysOnly = &t.idsOnly
  368. }
  369. if t.sort != nil {
  370. if err := sortToProto(t.sort, req.Params); err != nil {
  371. return err
  372. }
  373. }
  374. if t.refinements != nil {
  375. if err := refinementsToProto(t.refinements, req.Params); err != nil {
  376. return err
  377. }
  378. }
  379. for _, e := range t.exprs {
  380. req.Params.FieldSpec.Expression = append(req.Params.FieldSpec.Expression, &pb.FieldSpec_Expression{
  381. Name: proto.String(e.Name),
  382. Expression: proto.String(e.Expr),
  383. })
  384. }
  385. for _, f := range t.facetOpts {
  386. if err := f.setParams(req.Params); err != nil {
  387. return fmt.Errorf("bad FacetSearchOption: %v", err)
  388. }
  389. }
  390. // Don't repeat facet search.
  391. t.facetOpts = nil
  392. res := &pb.SearchResponse{}
  393. if err := internal.Call(t.c, "search", "Search", req, res); err != nil {
  394. return err
  395. }
  396. if res.Status == nil || res.Status.GetCode() != pb.SearchServiceError_OK {
  397. return fmt.Errorf("search: %s: %s", res.Status.GetCode(), res.Status.GetErrorDetail())
  398. }
  399. t.searchRes = res.Result
  400. if len(res.FacetResult) > 0 {
  401. t.facetRes = res.FacetResult
  402. }
  403. t.count = int(*res.MatchedCount)
  404. if t.limit > 0 {
  405. t.more = nil
  406. } else {
  407. t.more = moreSearch
  408. }
  409. return nil
  410. }
  411. // SearchOptions are the options for searching an index. Passing a nil
  412. // *SearchOptions is equivalent to using the default values.
  413. type SearchOptions struct {
  414. // Limit is the maximum number of documents to return. The zero value
  415. // indicates no limit.
  416. Limit int
  417. // IDsOnly indicates that only document IDs should be returned for the search
  418. // operation; no document fields are populated.
  419. IDsOnly bool
  420. // Sort controls the ordering of search results.
  421. Sort *SortOptions
  422. // Fields specifies which document fields to include in the results. If omitted,
  423. // all document fields are returned. No more than 100 fields may be specified.
  424. Fields []string
  425. // Expressions specifies additional computed fields to add to each returned
  426. // document.
  427. Expressions []FieldExpression
  428. // Facets controls what facet information is returned for these search results.
  429. // If no options are specified, no facet results will be returned.
  430. Facets []FacetSearchOption
  431. // Refinements filters the returned documents by requiring them to contain facets
  432. // with specific values. Refinements are applied in conjunction for facets with
  433. // different names, and in disjunction otherwise.
  434. Refinements []Facet
  435. // Cursor causes the results to commence with the first document after
  436. // the document associated with the cursor.
  437. Cursor Cursor
  438. // Offset specifies the number of documents to skip over before returning results.
  439. // When specified, Cursor must be nil.
  440. Offset int
  441. // CountAccuracy specifies the maximum result count that can be expected to
  442. // be accurate. If zero, the count accuracy defaults to 20.
  443. CountAccuracy int
  444. }
  445. // Cursor represents an iterator's position.
  446. //
  447. // The string value of a cursor is web-safe. It can be saved and restored
  448. // for later use.
  449. type Cursor string
  450. // FieldExpression defines a custom expression to evaluate for each result.
  451. type FieldExpression struct {
  452. // Name is the name to use for the computed field.
  453. Name string
  454. // Expr is evaluated to provide a custom content snippet for each document.
  455. // See https://cloud.google.com/appengine/docs/standard/go/search/options for
  456. // the supported expression syntax.
  457. Expr string
  458. }
  459. // FacetSearchOption controls what facet information is returned in search results.
  460. type FacetSearchOption interface {
  461. setParams(*pb.SearchParams) error
  462. }
  463. // AutoFacetDiscovery returns a FacetSearchOption which enables automatic facet
  464. // discovery for the search. Automatic facet discovery looks for the facets
  465. // which appear the most often in the aggregate in the matched documents.
  466. //
  467. // The maximum number of facets returned is controlled by facetLimit, and the
  468. // maximum number of values per facet by facetLimit. A limit of zero indicates
  469. // a default limit should be used.
  470. func AutoFacetDiscovery(facetLimit, valueLimit int) FacetSearchOption {
  471. return &autoFacetOpt{facetLimit, valueLimit}
  472. }
  473. type autoFacetOpt struct {
  474. facetLimit, valueLimit int
  475. }
  476. const defaultAutoFacetLimit = 10 // As per python runtime search.py.
  477. func (o *autoFacetOpt) setParams(params *pb.SearchParams) error {
  478. lim := int32(o.facetLimit)
  479. if lim == 0 {
  480. lim = defaultAutoFacetLimit
  481. }
  482. params.AutoDiscoverFacetCount = &lim
  483. if o.valueLimit > 0 {
  484. params.FacetAutoDetectParam = &pb.FacetAutoDetectParam{
  485. ValueLimit: proto.Int32(int32(o.valueLimit)),
  486. }
  487. }
  488. return nil
  489. }
  490. // FacetDiscovery returns a FacetSearchOption which selects a facet to be
  491. // returned with the search results. By default, the most frequently
  492. // occurring values for that facet will be returned. However, you can also
  493. // specify a list of particular Atoms or specific Ranges to return.
  494. func FacetDiscovery(name string, value ...interface{}) FacetSearchOption {
  495. return &facetOpt{name, value}
  496. }
  497. type facetOpt struct {
  498. name string
  499. values []interface{}
  500. }
  501. func (o *facetOpt) setParams(params *pb.SearchParams) error {
  502. req := &pb.FacetRequest{Name: &o.name}
  503. params.IncludeFacet = append(params.IncludeFacet, req)
  504. if len(o.values) == 0 {
  505. return nil
  506. }
  507. vtype := reflect.TypeOf(o.values[0])
  508. reqParam := &pb.FacetRequestParam{}
  509. for _, v := range o.values {
  510. if reflect.TypeOf(v) != vtype {
  511. return errors.New("values must all be Atom, or must all be Range")
  512. }
  513. switch v := v.(type) {
  514. case Atom:
  515. reqParam.ValueConstraint = append(reqParam.ValueConstraint, string(v))
  516. case Range:
  517. rng, err := rangeToProto(v)
  518. if err != nil {
  519. return fmt.Errorf("invalid range: %v", err)
  520. }
  521. reqParam.Range = append(reqParam.Range, rng)
  522. default:
  523. return fmt.Errorf("unsupported value type %T", v)
  524. }
  525. }
  526. req.Params = reqParam
  527. return nil
  528. }
  529. // FacetDocumentDepth returns a FacetSearchOption which controls the number of
  530. // documents to be evaluated with preparing facet results.
  531. func FacetDocumentDepth(depth int) FacetSearchOption {
  532. return facetDepthOpt(depth)
  533. }
  534. type facetDepthOpt int
  535. func (o facetDepthOpt) setParams(params *pb.SearchParams) error {
  536. params.FacetDepth = proto.Int32(int32(o))
  537. return nil
  538. }
  539. // FacetResult represents the number of times a particular facet and value
  540. // appeared in the documents matching a search request.
  541. type FacetResult struct {
  542. Facet
  543. // Count is the number of times this specific facet and value appeared in the
  544. // matching documents.
  545. Count int
  546. }
  547. // Range represents a numeric range with inclusive start and exclusive end.
  548. // Start may be specified as math.Inf(-1) to indicate there is no minimum
  549. // value, and End may similarly be specified as math.Inf(1); at least one of
  550. // Start or End must be a finite number.
  551. type Range struct {
  552. Start, End float64
  553. }
  554. var (
  555. negInf = math.Inf(-1)
  556. posInf = math.Inf(1)
  557. )
  558. // AtLeast returns a Range matching any value greater than, or equal to, min.
  559. func AtLeast(min float64) Range {
  560. return Range{Start: min, End: posInf}
  561. }
  562. // LessThan returns a Range matching any value less than max.
  563. func LessThan(max float64) Range {
  564. return Range{Start: negInf, End: max}
  565. }
  566. // SortOptions control the ordering and scoring of search results.
  567. type SortOptions struct {
  568. // Expressions is a slice of expressions representing a multi-dimensional
  569. // sort.
  570. Expressions []SortExpression
  571. // Scorer, when specified, will cause the documents to be scored according to
  572. // search term frequency.
  573. Scorer Scorer
  574. // Limit is the maximum number of objects to score and/or sort. Limit cannot
  575. // be more than 10,000. The zero value indicates a default limit.
  576. Limit int
  577. }
  578. // SortExpression defines a single dimension for sorting a document.
  579. type SortExpression struct {
  580. // Expr is evaluated to provide a sorting value for each document.
  581. // See https://cloud.google.com/appengine/docs/standard/go/search/options for
  582. // the supported expression syntax.
  583. Expr string
  584. // Reverse causes the documents to be sorted in ascending order.
  585. Reverse bool
  586. // The default value to use when no field is present or the expresion
  587. // cannot be calculated for a document. For text sorts, Default must
  588. // be of type string; for numeric sorts, float64.
  589. Default interface{}
  590. }
  591. // A Scorer defines how a document is scored.
  592. type Scorer interface {
  593. toProto(*pb.ScorerSpec)
  594. }
  595. type enumScorer struct {
  596. enum pb.ScorerSpec_Scorer
  597. }
  598. func (e enumScorer) toProto(spec *pb.ScorerSpec) {
  599. spec.Scorer = e.enum.Enum()
  600. }
  601. var (
  602. // MatchScorer assigns a score based on term frequency in a document.
  603. MatchScorer Scorer = enumScorer{pb.ScorerSpec_MATCH_SCORER}
  604. // RescoringMatchScorer assigns a score based on the quality of the query
  605. // match. It is similar to a MatchScorer but uses a more complex scoring
  606. // algorithm based on match term frequency and other factors like field type.
  607. // Please be aware that this algorithm is continually refined and can change
  608. // over time without notice. This means that the ordering of search results
  609. // that use this scorer can also change without notice.
  610. RescoringMatchScorer Scorer = enumScorer{pb.ScorerSpec_RESCORING_MATCH_SCORER}
  611. )
  612. func sortToProto(sort *SortOptions, params *pb.SearchParams) error {
  613. for _, e := range sort.Expressions {
  614. spec := &pb.SortSpec{
  615. SortExpression: proto.String(e.Expr),
  616. }
  617. if e.Reverse {
  618. spec.SortDescending = proto.Bool(false)
  619. }
  620. if e.Default != nil {
  621. switch d := e.Default.(type) {
  622. case float64:
  623. spec.DefaultValueNumeric = &d
  624. case string:
  625. spec.DefaultValueText = &d
  626. default:
  627. return fmt.Errorf("search: invalid Default type %T for expression %q", d, e.Expr)
  628. }
  629. }
  630. params.SortSpec = append(params.SortSpec, spec)
  631. }
  632. spec := &pb.ScorerSpec{}
  633. if sort.Limit > 0 {
  634. spec.Limit = proto.Int32(int32(sort.Limit))
  635. params.ScorerSpec = spec
  636. }
  637. if sort.Scorer != nil {
  638. sort.Scorer.toProto(spec)
  639. params.ScorerSpec = spec
  640. }
  641. return nil
  642. }
  643. func refinementsToProto(refinements []Facet, params *pb.SearchParams) error {
  644. for _, r := range refinements {
  645. ref := &pb.FacetRefinement{
  646. Name: proto.String(r.Name),
  647. }
  648. switch v := r.Value.(type) {
  649. case Atom:
  650. ref.Value = proto.String(string(v))
  651. case Range:
  652. rng, err := rangeToProto(v)
  653. if err != nil {
  654. return fmt.Errorf("search: refinement for facet %q: %v", r.Name, err)
  655. }
  656. // Unfortunately there are two identical messages for identify Facet ranges.
  657. ref.Range = &pb.FacetRefinement_Range{Start: rng.Start, End: rng.End}
  658. default:
  659. return fmt.Errorf("search: unsupported refinement for facet %q of type %T", r.Name, v)
  660. }
  661. params.FacetRefinement = append(params.FacetRefinement, ref)
  662. }
  663. return nil
  664. }
  665. func rangeToProto(r Range) (*pb.FacetRange, error) {
  666. rng := &pb.FacetRange{}
  667. if r.Start != negInf {
  668. if !validFloat(r.Start) {
  669. return nil, errors.New("invalid value for Start")
  670. }
  671. rng.Start = proto.String(strconv.FormatFloat(r.Start, 'e', -1, 64))
  672. } else if r.End == posInf {
  673. return nil, errors.New("either Start or End must be finite")
  674. }
  675. if r.End != posInf {
  676. if !validFloat(r.End) {
  677. return nil, errors.New("invalid value for End")
  678. }
  679. rng.End = proto.String(strconv.FormatFloat(r.End, 'e', -1, 64))
  680. }
  681. return rng, nil
  682. }
  683. func protoToRange(rng *pb.FacetRefinement_Range) Range {
  684. r := Range{Start: negInf, End: posInf}
  685. if x, err := strconv.ParseFloat(rng.GetStart(), 64); err != nil {
  686. r.Start = x
  687. }
  688. if x, err := strconv.ParseFloat(rng.GetEnd(), 64); err != nil {
  689. r.End = x
  690. }
  691. return r
  692. }
  693. // Iterator is the result of searching an index for a query or listing an
  694. // index.
  695. type Iterator struct {
  696. c context.Context
  697. index *Index
  698. err error
  699. listRes []*pb.Document
  700. listStartID string
  701. listInclusive bool
  702. searchRes []*pb.SearchResult
  703. facetRes []*pb.FacetResult
  704. searchQuery string
  705. searchCursor *string
  706. searchOffset int
  707. sort *SortOptions
  708. fields []string
  709. exprs []FieldExpression
  710. refinements []Facet
  711. facetOpts []FacetSearchOption
  712. more func(*Iterator) error
  713. count int
  714. countAccuracy int
  715. limit int // items left to return; 0 for unlimited.
  716. idsOnly bool
  717. }
  718. // errIter returns an iterator that only returns the given error.
  719. func errIter(err string) *Iterator {
  720. return &Iterator{
  721. err: errors.New(err),
  722. }
  723. }
  724. // Done is returned when a query iteration has completed.
  725. var Done = errors.New("search: query has no more results")
  726. // Count returns an approximation of the number of documents matched by the
  727. // query. It is only valid to call for iterators returned by Search.
  728. func (t *Iterator) Count() int { return t.count }
  729. // fetchMore retrieves more results, if there are no errors or pending results.
  730. func (t *Iterator) fetchMore() {
  731. if t.err == nil && len(t.listRes)+len(t.searchRes) == 0 && t.more != nil {
  732. t.err = t.more(t)
  733. }
  734. }
  735. // Next returns the ID of the next result. When there are no more results,
  736. // Done is returned as the error.
  737. //
  738. // dst must be a non-nil struct pointer, implement the FieldLoadSaver
  739. // interface, or be a nil interface value. If a non-nil dst is provided, it
  740. // will be filled with the indexed fields. dst is ignored if this iterator was
  741. // created with an IDsOnly option.
  742. func (t *Iterator) Next(dst interface{}) (string, error) {
  743. t.fetchMore()
  744. if t.err != nil {
  745. return "", t.err
  746. }
  747. var doc *pb.Document
  748. var exprs []*pb.Field
  749. switch {
  750. case len(t.listRes) != 0:
  751. doc = t.listRes[0]
  752. t.listRes = t.listRes[1:]
  753. case len(t.searchRes) != 0:
  754. doc = t.searchRes[0].Document
  755. exprs = t.searchRes[0].Expression
  756. t.searchCursor = t.searchRes[0].Cursor
  757. t.searchRes = t.searchRes[1:]
  758. default:
  759. return "", Done
  760. }
  761. if doc == nil {
  762. return "", errors.New("search: internal error: no document returned")
  763. }
  764. if !t.idsOnly && dst != nil {
  765. if err := loadDoc(dst, doc, exprs); err != nil {
  766. return "", err
  767. }
  768. }
  769. return doc.GetId(), nil
  770. }
  771. // Cursor returns the cursor associated with the current document (that is,
  772. // the document most recently returned by a call to Next).
  773. //
  774. // Passing this cursor in a future call to Search will cause those results
  775. // to commence with the first document after the current document.
  776. func (t *Iterator) Cursor() Cursor {
  777. if t.searchCursor == nil {
  778. return ""
  779. }
  780. return Cursor(*t.searchCursor)
  781. }
  782. // Facets returns the facets found within the search results, if any facets
  783. // were requested in the SearchOptions.
  784. func (t *Iterator) Facets() ([][]FacetResult, error) {
  785. t.fetchMore()
  786. if t.err != nil && t.err != Done {
  787. return nil, t.err
  788. }
  789. var facets [][]FacetResult
  790. for _, f := range t.facetRes {
  791. fres := make([]FacetResult, 0, len(f.Value))
  792. for _, v := range f.Value {
  793. ref := v.Refinement
  794. facet := FacetResult{
  795. Facet: Facet{Name: ref.GetName()},
  796. Count: int(v.GetCount()),
  797. }
  798. if ref.Value != nil {
  799. facet.Value = Atom(*ref.Value)
  800. } else {
  801. facet.Value = protoToRange(ref.Range)
  802. }
  803. fres = append(fres, facet)
  804. }
  805. facets = append(facets, fres)
  806. }
  807. return facets, nil
  808. }
  809. // saveDoc converts from a struct pointer or
  810. // FieldLoadSaver/FieldMetadataLoadSaver to the Document protobuf.
  811. func saveDoc(src interface{}) (*pb.Document, error) {
  812. var err error
  813. var fields []Field
  814. var meta *DocumentMetadata
  815. switch x := src.(type) {
  816. case FieldLoadSaver:
  817. fields, meta, err = x.Save()
  818. default:
  819. fields, meta, err = saveStructWithMeta(src)
  820. }
  821. if err != nil {
  822. return nil, err
  823. }
  824. fieldsProto, err := fieldsToProto(fields)
  825. if err != nil {
  826. return nil, err
  827. }
  828. d := &pb.Document{
  829. Field: fieldsProto,
  830. OrderId: proto.Int32(int32(time.Since(orderIDEpoch).Seconds())),
  831. OrderIdSource: pb.Document_DEFAULTED.Enum(),
  832. }
  833. if meta != nil {
  834. if meta.Rank != 0 {
  835. if !validDocRank(meta.Rank) {
  836. return nil, fmt.Errorf("search: invalid rank %d, must be [0, 2^31)", meta.Rank)
  837. }
  838. *d.OrderId = int32(meta.Rank)
  839. d.OrderIdSource = pb.Document_SUPPLIED.Enum()
  840. }
  841. if len(meta.Facets) > 0 {
  842. facets, err := facetsToProto(meta.Facets)
  843. if err != nil {
  844. return nil, err
  845. }
  846. d.Facet = facets
  847. }
  848. }
  849. return d, nil
  850. }
  851. func fieldsToProto(src []Field) ([]*pb.Field, error) {
  852. // Maps to catch duplicate time or numeric fields.
  853. timeFields, numericFields := make(map[string]bool), make(map[string]bool)
  854. dst := make([]*pb.Field, 0, len(src))
  855. for _, f := range src {
  856. if !validFieldName(f.Name) {
  857. return nil, fmt.Errorf("search: invalid field name %q", f.Name)
  858. }
  859. fieldValue := &pb.FieldValue{}
  860. switch x := f.Value.(type) {
  861. case string:
  862. fieldValue.Type = pb.FieldValue_TEXT.Enum()
  863. fieldValue.StringValue = proto.String(x)
  864. case Atom:
  865. fieldValue.Type = pb.FieldValue_ATOM.Enum()
  866. fieldValue.StringValue = proto.String(string(x))
  867. case HTML:
  868. fieldValue.Type = pb.FieldValue_HTML.Enum()
  869. fieldValue.StringValue = proto.String(string(x))
  870. case time.Time:
  871. if timeFields[f.Name] {
  872. return nil, fmt.Errorf("search: duplicate time field %q", f.Name)
  873. }
  874. timeFields[f.Name] = true
  875. fieldValue.Type = pb.FieldValue_DATE.Enum()
  876. fieldValue.StringValue = proto.String(strconv.FormatInt(x.UnixNano()/1e6, 10))
  877. case float64:
  878. if numericFields[f.Name] {
  879. return nil, fmt.Errorf("search: duplicate numeric field %q", f.Name)
  880. }
  881. if !validFloat(x) {
  882. return nil, fmt.Errorf("search: numeric field %q with invalid value %f", f.Name, x)
  883. }
  884. numericFields[f.Name] = true
  885. fieldValue.Type = pb.FieldValue_NUMBER.Enum()
  886. fieldValue.StringValue = proto.String(strconv.FormatFloat(x, 'e', -1, 64))
  887. case appengine.GeoPoint:
  888. if !x.Valid() {
  889. return nil, fmt.Errorf(
  890. "search: GeoPoint field %q with invalid value %v",
  891. f.Name, x)
  892. }
  893. fieldValue.Type = pb.FieldValue_GEO.Enum()
  894. fieldValue.Geo = &pb.FieldValue_Geo{
  895. Lat: proto.Float64(x.Lat),
  896. Lng: proto.Float64(x.Lng),
  897. }
  898. default:
  899. return nil, fmt.Errorf("search: unsupported field type: %v", reflect.TypeOf(f.Value))
  900. }
  901. if f.Language != "" {
  902. switch f.Value.(type) {
  903. case string, HTML:
  904. if !validLanguage(f.Language) {
  905. return nil, fmt.Errorf("search: invalid language for field %q: %q", f.Name, f.Language)
  906. }
  907. fieldValue.Language = proto.String(f.Language)
  908. default:
  909. return nil, fmt.Errorf("search: setting language not supported for field %q of type %T", f.Name, f.Value)
  910. }
  911. }
  912. if p := fieldValue.StringValue; p != nil && !utf8.ValidString(*p) {
  913. return nil, fmt.Errorf("search: %q field is invalid UTF-8: %q", f.Name, *p)
  914. }
  915. dst = append(dst, &pb.Field{
  916. Name: proto.String(f.Name),
  917. Value: fieldValue,
  918. })
  919. }
  920. return dst, nil
  921. }
  922. func facetsToProto(src []Facet) ([]*pb.Facet, error) {
  923. dst := make([]*pb.Facet, 0, len(src))
  924. for _, f := range src {
  925. if !validFieldName(f.Name) {
  926. return nil, fmt.Errorf("search: invalid facet name %q", f.Name)
  927. }
  928. facetValue := &pb.FacetValue{}
  929. switch x := f.Value.(type) {
  930. case Atom:
  931. if !utf8.ValidString(string(x)) {
  932. return nil, fmt.Errorf("search: %q facet is invalid UTF-8: %q", f.Name, x)
  933. }
  934. facetValue.Type = pb.FacetValue_ATOM.Enum()
  935. facetValue.StringValue = proto.String(string(x))
  936. case float64:
  937. if !validFloat(x) {
  938. return nil, fmt.Errorf("search: numeric facet %q with invalid value %f", f.Name, x)
  939. }
  940. facetValue.Type = pb.FacetValue_NUMBER.Enum()
  941. facetValue.StringValue = proto.String(strconv.FormatFloat(x, 'e', -1, 64))
  942. default:
  943. return nil, fmt.Errorf("search: unsupported facet type: %v", reflect.TypeOf(f.Value))
  944. }
  945. dst = append(dst, &pb.Facet{
  946. Name: proto.String(f.Name),
  947. Value: facetValue,
  948. })
  949. }
  950. return dst, nil
  951. }
  952. // loadDoc converts from protobufs to a struct pointer or
  953. // FieldLoadSaver/FieldMetadataLoadSaver. The src param provides the document's
  954. // stored fields and facets, and any document metadata. An additional slice of
  955. // fields, exprs, may optionally be provided to contain any derived expressions
  956. // requested by the developer.
  957. func loadDoc(dst interface{}, src *pb.Document, exprs []*pb.Field) (err error) {
  958. fields, err := protoToFields(src.Field)
  959. if err != nil {
  960. return err
  961. }
  962. facets, err := protoToFacets(src.Facet)
  963. if err != nil {
  964. return err
  965. }
  966. if len(exprs) > 0 {
  967. exprFields, err := protoToFields(exprs)
  968. if err != nil {
  969. return err
  970. }
  971. // Mark each field as derived.
  972. for i := range exprFields {
  973. exprFields[i].Derived = true
  974. }
  975. fields = append(fields, exprFields...)
  976. }
  977. meta := &DocumentMetadata{
  978. Rank: int(src.GetOrderId()),
  979. Facets: facets,
  980. }
  981. switch x := dst.(type) {
  982. case FieldLoadSaver:
  983. return x.Load(fields, meta)
  984. default:
  985. return loadStructWithMeta(dst, fields, meta)
  986. }
  987. }
  988. func protoToFields(fields []*pb.Field) ([]Field, error) {
  989. dst := make([]Field, 0, len(fields))
  990. for _, field := range fields {
  991. fieldValue := field.GetValue()
  992. f := Field{
  993. Name: field.GetName(),
  994. }
  995. switch fieldValue.GetType() {
  996. case pb.FieldValue_TEXT:
  997. f.Value = fieldValue.GetStringValue()
  998. f.Language = fieldValue.GetLanguage()
  999. case pb.FieldValue_ATOM:
  1000. f.Value = Atom(fieldValue.GetStringValue())
  1001. case pb.FieldValue_HTML:
  1002. f.Value = HTML(fieldValue.GetStringValue())
  1003. f.Language = fieldValue.GetLanguage()
  1004. case pb.FieldValue_DATE:
  1005. sv := fieldValue.GetStringValue()
  1006. millis, err := strconv.ParseInt(sv, 10, 64)
  1007. if err != nil {
  1008. return nil, fmt.Errorf("search: internal error: bad time.Time encoding %q: %v", sv, err)
  1009. }
  1010. f.Value = time.Unix(0, millis*1e6)
  1011. case pb.FieldValue_NUMBER:
  1012. sv := fieldValue.GetStringValue()
  1013. x, err := strconv.ParseFloat(sv, 64)
  1014. if err != nil {
  1015. return nil, err
  1016. }
  1017. f.Value = x
  1018. case pb.FieldValue_GEO:
  1019. geoValue := fieldValue.GetGeo()
  1020. geoPoint := appengine.GeoPoint{geoValue.GetLat(), geoValue.GetLng()}
  1021. if !geoPoint.Valid() {
  1022. return nil, fmt.Errorf("search: internal error: invalid GeoPoint encoding: %v", geoPoint)
  1023. }
  1024. f.Value = geoPoint
  1025. default:
  1026. return nil, fmt.Errorf("search: internal error: unknown data type %s", fieldValue.GetType())
  1027. }
  1028. dst = append(dst, f)
  1029. }
  1030. return dst, nil
  1031. }
  1032. func protoToFacets(facets []*pb.Facet) ([]Facet, error) {
  1033. if len(facets) == 0 {
  1034. return nil, nil
  1035. }
  1036. dst := make([]Facet, 0, len(facets))
  1037. for _, facet := range facets {
  1038. facetValue := facet.GetValue()
  1039. f := Facet{
  1040. Name: facet.GetName(),
  1041. }
  1042. switch facetValue.GetType() {
  1043. case pb.FacetValue_ATOM:
  1044. f.Value = Atom(facetValue.GetStringValue())
  1045. case pb.FacetValue_NUMBER:
  1046. sv := facetValue.GetStringValue()
  1047. x, err := strconv.ParseFloat(sv, 64)
  1048. if err != nil {
  1049. return nil, err
  1050. }
  1051. f.Value = x
  1052. default:
  1053. return nil, fmt.Errorf("search: internal error: unknown data type %s", facetValue.GetType())
  1054. }
  1055. dst = append(dst, f)
  1056. }
  1057. return dst, nil
  1058. }
  1059. func namespaceMod(m proto.Message, namespace string) {
  1060. set := func(s **string) {
  1061. if *s == nil {
  1062. *s = &namespace
  1063. }
  1064. }
  1065. switch m := m.(type) {
  1066. case *pb.IndexDocumentRequest:
  1067. set(&m.Params.IndexSpec.Namespace)
  1068. case *pb.ListDocumentsRequest:
  1069. set(&m.Params.IndexSpec.Namespace)
  1070. case *pb.DeleteDocumentRequest:
  1071. set(&m.Params.IndexSpec.Namespace)
  1072. case *pb.SearchRequest:
  1073. set(&m.Params.IndexSpec.Namespace)
  1074. }
  1075. }
  1076. func init() {
  1077. internal.RegisterErrorCodeMap("search", pb.SearchServiceError_ErrorCode_name)
  1078. internal.NamespaceMods["search"] = namespaceMod
  1079. }