## #### default configuration file for Portuguese analyzer ## #### General options Lang=pt #### Trace options. Only effective if we have compiled with -DVERBOSE # ## Possible values for TraceModule (may be OR'ed) #define SPLIT_TRACE 0x00000001 #define TOKEN_TRACE 0x00000002 #define MACO_TRACE 0x00000004 #define OPTIONS_TRACE 0x00000008 #define NUMBERS_TRACE 0x00000010 #define DATES_TRACE 0x00000020 #define PUNCT_TRACE 0x00000040 #define DICT_TRACE 0x00000080 #define SUFF_TRACE 0x00000100 #define LOCUT_TRACE 0x00000200 #define NP_TRACE 0x00000400 #define PROB_TRACE 0x00000800 #define QUANT_TRACE 0x00001000 #define NEC_TRACE 0x00002000 #define AUTOMAT_TRACE 0x00004000 #define TAGGER_TRACE 0x00008000 #define HMM_TRACE 0x00010000 #define RELAX_TRACE 0x00020000 #define RELAX_TAGGER_TRACE 0x00040000 #define CONST_GRAMMAR_TRACE 0x00080000 #define SENSES_TRACE 0x00100000 #define CHART_TRACE 0x00200000 #define GRAMMAR_TRACE 0x00400000 #define DEP_TRACE 0x00800000 #define UTIL_TRACE 0x01000000 TraceLevel=3 TraceModule=0x0000 ## Options to control the applied modules. The input may be partially ## processed, or not a full analysis may me wanted. The specific ## formats are a choice of the main program using the library, as well ## as the responsability of calling only the required modules. ## Valid input/output formats are: plain, token, splitted, morfo, tagged, parsed InputFormat=plain OutputFormat=tagged # consider each newline as a sentence end AlwaysFlush=no #### Tokenizer options TokenizerFile="$FREELINGSHARE/pt/tokenizer.dat" #### Splitter options SplitterFile="$FREELINGSHARE/pt/splitter.dat" #### Morfo options AffixAnalysis=yes MultiwordsDetection=yes NumbersDetection=yes PunctuationDetection=yes DatesDetection=yes QuantitiesDetection=yes DictionarySearch=yes ProbabilityAssignment=yes OrthographicCorrection=no DecimalPoint="," ThousandPoint="." LocutionsFile=$FREELINGSHARE/pt/locucions.dat QuantitiesFile=$FREELINGSHARE/pt/quantities.dat AffixFile=$FREELINGSHARE/pt/afixos.dat ProbabilityFile=$FREELINGSHARE/pt/probabilitats.dat DictionaryFile=$FREELINGSHARE/pt/maco.db PunctuationFile=$FREELINGSHARE/common/punct.dat ProbabilityThreshold=0.001 #NER options NERecognition=basic NPDataFile=$FREELINGSHARE/pt/np.dat ## --- comment lines above and uncomment those below, if you want ## --- a better NE recognizer (higer accuracy, lower speed) #NERecognition=bio #NPDataFile=$FREELINGSHARE/pt/ner/ner.dat #Spelling Corrector config file CorrectorFile=$FREELINGSHARE/pt/corrector/corrector.dat ## NEC options NEClassification=no NECFilePrefix=$FREELINGSHARE/pt/nec/nec ## Sense annotation options (none,all,mfs) SenseAnnotation=none SenseFile=$FREELINGSHARE/pt/senses30.db DuplicateAnalysis=false UKBRelations=$FREELINGSHARE/common/xwn30-ukb.bin UKBDictionary=$FREELINGSHARE/pt/senses30.ukb UKBEpsilon=0.03 UKBMaxIter=10 #### Tagger options Tagger=hmm TaggerHMMFile=$FREELINGSHARE/pt/tagger.dat TaggerRelaxFile=$FREELINGSHARE/pt/constr_gram.dat TaggerRelaxMaxIter=500 TaggerRelaxScaleFactor=670.0 TaggerRelaxEpsilon=0.001 TaggerRetokenize=yes TaggerForceSelect=retok #### Parser options GrammarFile=EXP_PT/grammar-dep-pt.dat #### Dependence Parser options DepTxalaFile=$FREELINGSHARE/pt/dep/dependences.dat #### Coreference Solver options CoreferenceResolution=no CorefFile=$FREELINGSHARE/pt/coref/coref.dat