2013-04-11 3 views
1

Мы пытаемся оценить Solr как решение для поиска и столкнуться с проблемами при поиске баллов. Мы сканировали и индексировали, но все баллы приходят как 0.0, несмотря на то, что мы не изменили какую-либо конфигурацию в отношении оценки по нашим знаниям.Solr Результаты поиска всегда равны 0 .0

Было бы здорово, если бы кто-нибудь мог реплицировать и сообщить нам причину здесь. Было бы большой помощью.

Solr версии 4,2

+0

Пожалуйста, ваши схемы, запросов и выходных данных из debugQuery – phisch

+0

Попробуйте дать толчок к документам. Это повлияет на оценку. http://wiki.apache.org/solr/SolrRelevancyFAQ#How_can_I_change_the_score_of_a_document_based_on_the_.2Avalue.2A_of_a_field_.28say.2C_.22popularity.22.29 – Max

+0

PLease найти прикрепленные файлы конфигурации – user2244070

ответ

0
<schema name="nutch" version="1.5"> 
    <types> 
    <fieldType name="text" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="true"> 
     <analyzer type="index"> 
     <tokenizer class="solr.WhitespaceTokenizerFactory"/> 
     <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" enablePositionIncrements="true" />   
     <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/>   
     <filter class="solr.LowerCaseFilterFactory"/> 
     <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/> 
     <filter class="solr.RemoveDuplicatesTokenFilterFactory" /> 
     <filter class="solr.SnowballPorterFilterFactory" language="English" /> 
     </analyzer> 
     <analyzer type="query"> 
     <tokenizer class="solr.WhitespaceTokenizerFactory"/>  
     <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" enablePositionIncrements="true" /> 
     <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/> 
     <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/>   
     <filter class="solr.LowerCaseFilterFactory"/> 
     <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/> 
     <filter class="solr.RemoveDuplicatesTokenFilterFactory" /> 
     <filter class="solr.SnowballPorterFilterFactory" language="English" /> 
     </analyzer> 
    </fieldType> 
     <fieldType name="string" class="solr.StrField" sortMissingLast="true" 
      omitNorms="true"/> 
     <fieldType name="long" class="solr.TrieLongField" precisionStep="0" 
      omitNorms="true" positionIncrementGap="0"/> 
    <fieldType name="int" class="solr.TrieIntField" precisionStep="0" omitNorms="true" positionIncrementGap="0"/> 
     <fieldType name="float" class="solr.TrieFloatField" precisionStep="0" 
      omitNorms="true" positionIncrementGap="0"/> 
    <!-- boolean type: "true" or "false" --> 
    <fieldType name="boolean" class="solr.BoolField" sortMissingLast="true" omitNorms="true"/> 
    <!--Binary data type. The data should be sent/retrieved in as Base64 encoded Strings --> 
    <fieldtype name="binary" class="solr.BinaryField"/> 
    <fieldType name="double" class="solr.TrieDoubleField" precisionStep="0" omitNorms="true" positionIncrementGap="0"/> 
    <fieldType name="tint" class="solr.TrieIntField" precisionStep="8" omitNorms="true" positionIncrementGap="0"/> 
    <fieldType name="tfloat" class="solr.TrieFloatField" precisionStep="8" omitNorms="true" positionIncrementGap="0"/> 
    <fieldType name="tlong" class="solr.TrieLongField" precisionStep="8" omitNorms="true" positionIncrementGap="0"/> 
    <fieldType name="tdouble" class="solr.TrieDoubleField" precisionStep="8" omitNorms="true" positionIncrementGap="0"/> 
    <fieldType name="tdate" class="solr.TrieDateField" omitNorms="true" precisionStep="6" positionIncrementGap="0"/> 

    <!-- A general unstemmed text field - good if one does not know the language of the field --> 
    <fieldType name="textgen" class="solr.TextField" positionIncrementGap="100"> 
     <analyzer type="index"> 
     <tokenizer class="solr.WhitespaceTokenizerFactory"/> 
     <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" enablePositionIncrements="true" /> 
     <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="0"/> 
     <filter class="solr.LowerCaseFilterFactory"/> 
     </analyzer> 
     <analyzer type="query"> 
     <tokenizer class="solr.WhitespaceTokenizerFactory"/>   
     <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" enablePositionIncrements="true" /> 
     <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/> 
     <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="0"/> 
     <filter class="solr.LowerCaseFilterFactory"/> 
     </analyzer> 
    </fieldType> 


    <fieldType name="random" class="solr.RandomSortField" indexed="true" /> 

     <fieldType name="date" class="solr.TrieDateField" precisionStep="0" 
      omitNorms="true" positionIncrementGap="0"/> 
<fieldType name="fullText" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="true"> 
     <analyzer type="index"> 
     <tokenizer class="solr.WhitespaceTokenizerFactory"/> 
     <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" enablePositionIncrements="true" />   
     <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/> 
     <charFilter class="solr.MappingCharFilterFactory" mapping="mapping-FoldToASCII.txt"/> 
     <filter class="solr.LowerCaseFilterFactory"/> 
     <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/> 
     <filter class="solr.RemoveDuplicatesTokenFilterFactory" /> 
     <filter class="solr.SnowballPorterFilterFactory" language="English" /> 
     </analyzer> 
     <analyzer type="query"> 
     <tokenizer class="solr.WhitespaceTokenizerFactory"/>   
     <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" enablePositionIncrements="true" /> 
     <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/> 
     <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/> 
     <charFilter class="solr.MappingCharFilterFactory" mapping="mapping-FoldToASCII.txt" /> 
     <filter class="solr.LowerCaseFilterFactory"/> 
     <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/> 
     <filter class="solr.RemoveDuplicatesTokenFilterFactory" /> 
     <filter class="solr.SnowballPorterFilterFactory" language="English" /> 
     </analyzer> 
    </fieldType> 

    <!-- For Spellcheck --> 
    <fieldType name="textSpell" class="solr.TextField" positionIncrementGap="100"> 
     <analyzer type="index"> 
     <tokenizer class="solr.StandardTokenizerFactory"/> 
     <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" enablePositionIncrements="true" /> 
     <filter class="solr.LowerCaseFilterFactory"/> 
     <filter class="solr.RemoveDuplicatesTokenFilterFactory"/> 
     </analyzer> 
     <analyzer type="query"> 
     <tokenizer class="solr.StandardTokenizerFactory"/>   
     <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" enablePositionIncrements="true" /> 
     <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/> 
     <filter class="solr.LowerCaseFilterFactory"/> 
     <filter class="solr.RemoveDuplicatesTokenFilterFactory"/> 
     </analyzer> 
    </fieldType> 

    <!-- lowercases the entire field value, keeping it as a single token. --> 
    <fieldType name="lowercase" class="solr.TextField" positionIncrementGap="100"> 
     <analyzer> 
     <tokenizer class="solr.KeywordTokenizerFactory"/> 
     <filter class="solr.LowerCaseFilterFactory" /> 
     </analyzer> 
    </fieldType> 

    <fieldType name="url" class="solr.TextField" 
      positionIncrementGap="100"> 
      <analyzer> 
       <tokenizer class="solr.StandardTokenizerFactory"/> 
       <filter class="solr.LowerCaseFilterFactory"/> 
       <filter class="solr.WordDelimiterFilterFactory" 
        generateWordParts="1" generateNumberParts="1"/> 
      </analyzer> 
     </fieldType> 
    </types> 
    <fields> 
     <field name="id" type="string" stored="true" indexed="true" required="true" multiValued="false" /> 
    <!-- Spellcheck field --> 
     <field name="spell" type="textSpell" indexed="true" stored="false" required="false" multiValued="true" /> 
    <!-- For Sorting purpose --> 
     <field name="titleSort" type="lowercase" indexed="true" stored="false" required="false"/> 
     <!-- core fields --> 
     <field name="segment" type="string" stored="true" indexed="false"/> 
     <field name="digest" type="string" stored="true" indexed="false"/> 
     <field name="boost" type="float" stored="true" indexed="false"/> 

     <!-- fields for index-basic plugin --> 
     <field name="host" type="url" stored="false" indexed="true"/> 
     <field name="site" type="string" stored="false" indexed="true"/> 
     <field name="url" type="url" stored="true" indexed="true"/> 
     <field name="content" type="text" stored="true" indexed="true"/> 
     <field name="title" type="text" stored="true" indexed="true"/> 
     <field name="cache" type="string" stored="true" indexed="false"/> 
     <field name="tstamp" type="date" stored="true" indexed="true"/> 
    <field name="_version_" type="long" indexed="true" stored="true"/> 
     <!-- fields for index-anchor plugin --> 
     <field name="anchor" type="string" stored="true" indexed="true" 
      multiValued="true"/> 

     <!-- fields for index-more plugin --> 
     <field name="type" type="string" stored="true" indexed="true" 
      multiValued="true"/> 
     <field name="contentLength" type="long" stored="true" 
      indexed="false"/> 
     <field name="lastModified" type="date" stored="true" 
      indexed="false"/> 
     <field name="date" type="date" stored="true" indexed="true"/> 

     <!-- fields for languageidentifier plugin --> 
     <field name="lang" type="string" stored="true" indexed="true"/> 
<!-- KS --> 
<field name="metatag.description" type="text" stored="true" indexed="true"/> 
<field name="metatag.keywords" type="text" stored="true" indexed="true"/> 

     <!-- fields for subcollection plugin --> 
     <field name="subcollection" type="string" stored="true" 
      indexed="true" multiValued="true"/> 

     <!-- fields for feed plugin (tag is also used by microformats-reltag)--> 
     <field name="author" type="string" stored="true" indexed="true"/> 
     <field name="tag" type="string" stored="true" indexed="true" multiValued="true"/> 
     <field name="feed" type="string" stored="true" indexed="true"/> 
     <field name="publishedDate" type="date" stored="true" 
      indexed="true"/> 
     <field name="updatedDate" type="date" stored="true" 
      indexed="true"/> 
     <!-- fields for creativecommons plugin --> 
     <field name="cc" type="string" stored="true" indexed="true" 
      multiValued="true"/> 

<!-- This field has been added to implement the default search configuration --> 
     <field name="text" type="fullText" indexed="true" stored="false" required="false" multiValued="true" /> 

<!-- For Extract Handler --> 

     <!-- copyField commands copy one field to another at the time a document 
     is added to the index. It's used either to index the same field differently, 
     or to add multiple fields to the same field for easier/faster searching. --> 
       <copyField source="anchor" dest="text"/> 
       <copyField source="title" dest="text"/> 
       <copyField source="content" dest="text"/> 
<!--    <copyField source="description" dest="text"/>--> 
<!--    <copyField source="keywords" dest="text"/> --> 


     <!-- For Spell Check --> 
     <copyField source="anchor" dest="spell"/> 
     <copyField source="title" dest="spell"/> 
     <copyField source="content" dest="spell"/> 
     <!-- For Sorting purpose --> 
     <copyField source="title" dest="titleSort"/> 

<!-- Dynamic field definitions. If a field name is not found, dynamicFields 
     will be used if the name matches any of the patterns. 
     RESTRICTION: the glob-like pattern in the name attribute must have 
     a "*" only at the start or the end. 
     EXAMPLE: name="*_i" will match any field ending in _i (like myid_i, z_i) 
     Longer patterns will be matched first. if equal size patterns 
     both match, the first appearing in the schema will be used. --> 
    <dynamicField name="*_i" type="int" indexed="true" stored="true"/> 
    <dynamicField name="*_s" type="string" indexed="true" stored="true"/> 
    <dynamicField name="*_l" type="long" indexed="true" stored="true"/> 
    <dynamicField name="*_t" type="text" indexed="true" stored="true"/> 
    <dynamicField name="*_b" type="boolean" indexed="true" stored="true"/> 
    <dynamicField name="*_f" type="float" indexed="true" stored="true"/> 
    <dynamicField name="*_d" type="double" indexed="true" stored="true"/> 
    <dynamicField name="*_dt" type="date" indexed="true" stored="true"/> 

    <!-- some trie-coded dynamic fields for faster range queries --> 
    <dynamicField name="*_ti" type="tint" indexed="true" stored="true"/> 
    <dynamicField name="*_tl" type="tlong" indexed="true" stored="true"/> 
    <dynamicField name="*_tf" type="tfloat" indexed="true" stored="true"/> 
    <dynamicField name="*_td" type="tdouble" indexed="true" stored="true"/> 
    <dynamicField name="*_tdt" type="tdate" indexed="true" stored="true"/> 
    <dynamicField name="attr_*" type="textgen" indexed="true" stored="true" multiValued="true"/> 

    <dynamicField name="random_*" type="random" /> 



    </fields> 
    <uniqueKey>id</uniqueKey> 
    <defaultSearchField>text</defaultSearchField> 
    <solrQueryParser defaultOperator="OR"/> 
    <similarity class="org.apache.lucene.search.similarities.DefaultSimilarity"/> 
    </schema> 
0
Query : select?q=Menu&wt=xml&indent=true&debug=true&fl=*,score 

This XML file does not appear to have any style information associated with it. The document tree is shown below. 

− 
<response> 
− 
<lst name="responseHeader"> 
<int name="status">0</int> 
<int name="QTime">3</int> 
− 
<lst name="params"> 
<str name="fl">id,title,score</str> 
<str name="indent">true</str> 
<str name="q">Menu</str> 
<str name="debug">true</str> 
<str name="wt">xml</str> 
</lst> 
</lst> 
− 
<result name="response" numFound="6" start="0" maxScore="0.0"> 
− 
<doc> 
<str name="title">English :: McDonalds.ca</str> 
<str name="id">http://www.mcdonalds.ca/</str> 
<float name="score">0.0</float> 
</doc> 
− 
<doc> 
<str name="title">Corporate Opportunities :: McDonalds.ca</str> 
− 
<str name="id"> 
http://www.mcdonalds.ca/ca/en/careers/corp_opp/corporate_opportunities.html 
</str> 
<float name="score">0.0</float> 
</doc> 
− 
<doc> 
<str name="title">People :: McDonalds.ca</str> 
− 
<str name="id"> 
http://www.mcdonalds.ca/ca/en/careers/rest_opp/people.html 
</str> 
<float name="score">0.0</float> 
</doc> 
− 
<doc> 
<str name="title">Training, Education and Benefits :: McDonalds.ca</str> 
− 
<str name="id"> 
http://www.mcdonalds.ca/ca/en/careers/rest_opp/training_education_and_benefits.html 
</str> 
<float name="score">0.0</float> 
</doc> 
− 
<doc> 
<str name="title">Working Here :: McDonalds.ca</str> 
− 
<str name="id"> 
http://www.mcdonalds.ca/ca/en/careers/rest_opp/working_here.html 
</str> 
<float name="score">0.0</float> 
</doc> 
− 
<doc> 
<str name="title">Environment :: McDonalds.ca</str> 
− 
<str name="id"> 
http://www.mcdonalds.ca/ca/en/communities/environment.html 
</str> 
<float name="score">0.0</float> 
</doc> 
</result> 
− 
<lst name="spellcheck"> 
<lst name="suggestions"/> 
</lst> 
− 
<lst name="debug"> 
− 
<lst name="queryBoosting"> 
<str name="q">menu</str> 
<null name="match"/> 
</lst> 
<str name="rawquerystring">Menu</str> 
<str name="querystring">Menu</str> 
<str name="parsedquery">text:menu</str> 
<str name="parsedquery_toString">text:menu</str> 
− 
<lst name="explain"> 
− 
<str name="http://www.mcdonalds.ca/"> 

0.0 = (MATCH) weight(text:menu in 0) [DefaultSimilarity], result of: 
    0.0 = fieldWeight in 0, product of: 
    1.4142135 = tf(freq=2.0), with freq of: 
     2.0 = termFreq=2.0 
    0.84584934 = idf(docFreq=6, maxDocs=6) 
    0.0 = fieldNorm(doc=0) 
</str> 
− 
<str name="http://www.mcdonalds.ca/ca/en/careers/corp_opp/corporate_opportunities.html"> 

0.0 = (MATCH) weight(text:menu in 1) [DefaultSimilarity], result of: 
    0.0 = fieldWeight in 1, product of: 
    1.0 = tf(freq=1.0), with freq of: 
     1.0 = termFreq=1.0 
    0.84584934 = idf(docFreq=6, maxDocs=6) 
    0.0 = fieldNorm(doc=1) 
</str> 
− 
<str name="http://www.mcdonalds.ca/ca/en/careers/rest_opp/people.html"> 

0.0 = (MATCH) weight(text:menu in 2) [DefaultSimilarity], result of: 
    0.0 = fieldWeight in 2, product of: 
    1.0 = tf(freq=1.0), with freq of: 
     1.0 = termFreq=1.0 
    0.84584934 = idf(docFreq=6, maxDocs=6) 
    0.0 = fieldNorm(doc=2) 
</str> 
− 
<str name="http://www.mcdonalds.ca/ca/en/careers/rest_opp/training_education_and_benefits.html"> 

0.0 = (MATCH) weight(text:menu in 3) [DefaultSimilarity], result of: 
    0.0 = fieldWeight in 3, product of: 
    1.0 = tf(freq=1.0), with freq of: 
     1.0 = termFreq=1.0 
    0.84584934 = idf(docFreq=6, maxDocs=6) 
    0.0 = fieldNorm(doc=3) 
</str> 
− 
<str name="http://www.mcdonalds.ca/ca/en/careers/rest_opp/working_here.html"> 

0.0 = (MATCH) weight(text:menu in 4) [DefaultSimilarity], result of: 
    0.0 = fieldWeight in 4, product of: 
    1.0 = tf(freq=1.0), with freq of: 
     1.0 = termFreq=1.0 
    0.84584934 = idf(docFreq=6, maxDocs=6) 
    0.0 = fieldNorm(doc=4) 
</str> 
− 
<str name="http://www.mcdonalds.ca/ca/en/communities/environment.html"> 

0.0 = (MATCH) weight(text:menu in 5) [DefaultSimilarity], result of: 
    0.0 = fieldWeight in 5, product of: 
    1.0 = tf(freq=1.0), with freq of: 
     1.0 = termFreq=1.0 
    0.84584934 = idf(docFreq=6, maxDocs=6) 
    0.0 = fieldNorm(doc=5) 
</str> 
</lst> 
<str name="QParser">LuceneQParser</str> 
− 
<lst name="timing"> 
<double name="time">3.0</double> 
− 
<lst name="prepare"> 
<double name="time">1.0</double> 
− 
<lst name="query"> 
<double name="time">0.0</double> 
</lst> 
− 
<lst name="facet"> 
<double name="time">0.0</double> 
</lst> 
− 
<lst name="mlt"> 
<double name="time">0.0</double> 
</lst> 
− 
<lst name="highlight"> 
<double name="time">0.0</double> 
</lst> 
− 
<lst name="stats"> 
<double name="time">0.0</double> 
</lst> 
− 
<lst name="spellcheck"> 
<double name="time">0.0</double> 
</lst> 
− 
<lst name="elevator"> 
<double name="time">1.0</double> 
</lst> 
− 
<lst name="debug"> 
<double name="time">0.0</double> 
</lst> 
</lst> 
− 
<lst name="process"> 
<double name="time">2.0</double> 
− 
<lst name="query"> 
<double name="time">0.0</double> 
</lst> 
− 
<lst name="facet"> 
<double name="time">0.0</double> 
</lst> 
− 
<lst name="mlt"> 
<double name="time">0.0</double> 
</lst> 
− 
<lst name="highlight"> 
<double name="time">0.0</double> 
</lst> 
− 
<lst name="stats"> 
<double name="time">0.0</double> 
</lst> 
− 
<lst name="spellcheck"> 
<double name="time">0.0</double> 
</lst> 
− 
<lst name="elevator"> 
<double name="time">0.0</double> 
</lst> 
− 
<lst name="debug"> 
<double name="time">2.0</double> 
</lst> 
</lst> 
</lst> 
</lst> 
</response> 
0

Я имел www.mcdonalds.ca домен configurd в моем фильтре. Поскольку это перенаправлялось на www.mcdonalds.ca.ca/en.html, оценка не поступала ни по одному из URL.

Я изменил верхний URL-адрес, а вместо www.mcdonalds.ca я упомянул www.mcdonalds.ca/ca/en.html, и это сработало.

Пожалуйста, перейдите по следующей ссылке для получения более подробной информации -

http://grokbase.com/t/nutch/user/117cpvp70q/a-possible-solution-to-my-url-redirection-and-zero-scores-problem

Смежные вопросы