@string{ um="Universidade do Minho" }
@string{ umdi="Universidade do Minho, Departamento de Informática" }

@article{ velharia1,
  author = {J.J. Almeida and A. Filipe},
  title = {Descrição de um Núcleo Gráfico e Aplicação em {CAD}},
  journal = {Revista de Informática},
  note = {(KGUM - kernel gráfico U.Minho)},
  number = 1,
  volume = 6,
  year = 1987,
}

@article{ velharia2,
  author = {C. Ferreira and F. Ferreira and F. Martins and  J.J. Almeida and
L. Barbosa},
  title = {Sistemas de Programação Modular},
  journal = {Revista de Informática},
  number = 6,
  volume = 9,
  year = 1988,
}

@inproceedings{graminteractivas1990,
  author={F. Mário Martins and J.J. Almeida and P.R. Henriques},
  title = {Mecanismos para Especificação e Prototipagem de Interfaces
    Utilizador-Sistema},
  note = {(Gramáticas Interactivas guardadas)},
  booktitle={3$º$ Encontro Português de Computação Gráfica},
  address="Coimbra",
  year = 1990,
}

@techreport{tlc89,
 author={J.J. Almeida and J.B. Barros},
 title = {Teoria das Linguagens },
 institution = umdi,
 type = "Texto didáctico",
 year = 1988,
 keyword = "compiladores",
}

@techreport{estruturasdedados90,
 author={J.B. Barros and J.J. Almeida},
 title = {Estruturas de Dados},
 institution = umdi,
 type = "Texto didáctico",
 year = 1990,
 keyword = "algoritmos",
}

@techreport{Camila,
  author ={{projecto Camila}},
  editor ={L.S. Barbosa and J.J. Almeida and J.N. Oliveira and Luís Neves},
  title = "\textsc{Camila} - A Platform for Software Mathematical Development",
  url="http://camila.di.uminho.pt",
  type="(Páginas do projecto)",
  institution = umdi,
  year=1998,
  keyword = "FS",
}

@techreport{Natura,
  author ={J.J. Almeida},
  title = "{Natura} - Natural language processing",
  url="http://natura.di.uminho.pt/",
  type="(Páginas do projecto)",
  note="\url{http://natura.di.uminho.pt/}",
  institution = umdi,
  keyword = "PLN",
  year = 1997,
}

@techreport{PDavid,
  author = "{projecto David}",
  editor ={J.C. Ramalho and J.J. Almeida and P.R. Henriques},
  title = "David -- Processamento estruturado de documentos",
  url="http://www.di.uminho.pt/~jcr/projectos/david/princ.html",
  note="\url{http://www.di.uminho.pt/~jcr/projectos/david/princ.html}",
  type="(Páginas do projecto)",
  institution = umdi,
  keyword = "FS,SGML",
  year = 1998,
}

@misc{nllex,
  type={tool},
  author ={J.J. Almeida},
  title = "NLlex -- Natural Language LEX",
  url="http://natura.di.uminho.pt/~jj/pln/pln.html#nllex",
  keyword = "lexical analysis, Natura,lex",
  year = 1996,
}

@misc{jspell,
  type = {tool},
  author ={J.J. Almeida and Ulisses Pinto},
  title = "Jspell a module for morphological analyser for natural language",
  url="http://natura.di.uminho.pt/~jj/pln/pln.html#jspell",
  keyword = "lexical analysis, Natura,morphology",
  year = 1997,
}

@techreport{jspell1,
   author = "J.J. Almeida and Ulisses Pinto",
   title = "Manual de Utilizador do {JSpell}",
   year = 1994,
   type = "Manual",
   month = "Jul",
   institution = umdi,
   keyword = "morphology, lexical analysis,jspell",
   abstract = {},
   url = "http://natura.di.uminho.pt/~jj/pln/jspellman.ps.gz",
}

@inproceedings{Almeida94b,
   author = "J.J. Almeida",
   title = "{GPC} -- a Tool for higher-order grammar specification",
   booktitle = "Actas del X Congreso de Lenguajes Naturales e Leanguajes Formales, Sevilla",
   year = 1994,
   url = "http://natura.di.uminho.pt/~jj/pln/yalg3.ps.gz",
   editor="Carlos Martin Vide",
 keyword ="DCG,grammar",
}

@inproceedings{Almeida95a,
   author = "J.J. Almeida",
   title = "{YaLG} -- extending {DCG} for natural language processing",
   booktitle = "Actas del XI Congreso de Lenguajes Naturales e Leanguajes Formales, Tortosa",
   year = 1995,
   pages = "621--628",
   editor="Carlos Martin Vide",
   url = "http://natura.di.uminho.pt/~jj/pln/yalg.ps.gz",
 keyword ="jspell,morphology,PLN,DCG,nllex",
}

@inproceedings{Almeida94c,
   author = "J.J. Almeida and Ulisses Pinto",
   title = "Jspell -- um módulo para análise léxica genérica de linguagem natural",
   booktitle = "Actas do X Encontro da Associação Portuguesa de Linguística",
 address={Évora 1994},
   pages = "1--15",
   year = 1995,
   url = "http://natura.di.uminho.pt/~jj/pln/jspell1.ps.gz",
 keyword ="jspell,morphology,PLN,perl",
}

@inproceedings{Almeida94a,
   author = "J.J. Almeida",
   title = "Documents in an Informatic Academic environment",
   booktitle = "Congresso Nacional de Bibliotecários, Arquivistas e
Documentalistas",
   address = "Lisboa",
   year = 1994,
 keyword ="librarian studies,WWW,WAIS,IR",
}

@techreport{jj95,
   author = "J.J. Almeida",
   title = "{NLlex} -- a tool to generate lexical analysers for natural language",
   institution = umdi,
   year   = 1995,
   number = "UM-DI-95.04",
   url = "http://natura.di.uminho.pt/~jj/pln/nllex.ps.gz",
 keyword ="jspell,morphology,lex,PLN,nllex",
}

@techreport{Barbosa95,
 author = "L.S. Barbosa and J.J. Almeida",
 title  = "System Prototyping in \textsc{Camila}",
 year = 1995,
 number = "DI-CAM-95:11:1",
 institution="University of Minho",
 note = "Lecture notes for the System Design Course,
         Computer System Engineering, University of Bristol",
 url="http://www.di.uminho.pt/~lsb/pub_camila/LNcam.ps.gz",
 keyword ="Camila, formal specification",
}

@techreport{Barbosa95a,
 author = "L.S. Barbosa and J.J. Almeida",
 title  = "\textsc{Camila}: A reference Manual",
 year = 1995,
 number = "DI-CAM-95:11:2",
 institution="University of Minho",
 url="http://www.di.uminho.pt/~lsb/pub_camila/RMcam.ps.gz",
 keyword ="Camila",
}

@techreport{BA97a,
  keyword = "Formal Methods, Prototyping, Camila",
  author = "L.S. Barbosa and J.J. Almeida",
  title = "Systems Prototyping in \textsc{Camila}",
  type = "{Lecture Notes for the Bristol Course  (1st ed.  1995)}",
  year = 1998,
  institution = "DI (U. Minho)",
  number = "DI-CAM-95:11:1:v98"
  }

@techreport{Barbosa95b,
   author = "L.S. Barbosa and J.J. Almeida",
   title = "Growing Up With \textsc{Camila}",
   institution =   umdi,
   year   = 1995,
   number = "DI-CAM-95:7:1",
   url = "http://www.di.uminho.pt/~lsb/pub_camila/romantic.ps.gz",
 keyword ="Camila, formal specification, didatics",
}

@inproceedings{Ramalho95,
   author = "J.C. Ramalho and J.J. Almeida and P.R. Henriques",
   title = "Algebraic Specification of Documents",
   booktitle = "TWLT10 - Algebraic Methods in Language Processing",
   year = 1995,
   month = "6--8 Dec.",
   editor = "A. Nijholt and G. Scollo and R. Steetskamp",
   address = "Twente University, Netherlands",
   note = "AMiLP'95",
   series = "Twente Workshop on Language Technology",
   url="http://natura.di.uminho.pt/~jj/bib/amilp95.ps.gz",
   docpage="http://www.di.uminho.pt/~jcr/projectos/david/ARTIGOS/AMiLP95/amilp95.html",
   pages = "55--64",
   keyword ="PDavid, Camila, SGML",
}

@inproceedings{Almeida96a,
   author = "J.J. Almeida",
   title = "Especificação e tratamento de Dicionários",
   booktitle = "Actas do XI Encontro da Associação Portuguesa de Linguística",
   address={Lisboa 1995},
   year = 1996,
   volume=2,
   url = "http://natura.di.uminho.pt/~jj/pln/etdic.ps.gz",
   keyword ="perl,morphology,lexical analysis, dictionary",
}

@inproceedings{Ulisses96,
   author = "Ulisses Pinto and J.J. Almeida",
   title = "Tratamento automático de termos compostos",
   booktitle = "Actas do XI Encontro da Associação Portuguesa de Linguística",
   address= "Lisboa 1995",
   volume=2,
   year = 1996,
   url = "http://natura.di.uminho.pt/~jj/pln/ptc.ps.gz",
   keyword = "jspell,morphology,lexical analysis, PLN",
}

@inproceedings{Almeida96b,
   author = "J.J. Almeida and J.B. Barros",
   title = "{YaLG} a tool for higher-order grammar specification",
   booktitle = " II International Conference on Mathematical Linguistics, Tarragona, Spain",
   year = 1996,
   url="http://natura.di.uminho.pt/~jj/pln/yalg2.ps.gz",
   keyword ="yalg,DCG,RS",
}

@article{jj96,
   author = "J.J. Almeida",
   title = "{NLlex} -- a tool to generate lexical analysers for natural language",
   year   = 1996,
   month = "Sep",
   volume = "19",
   pages = "81--90",
  journal =    {Procesamiento del Lenguaje Natural},
   publisher="Sociedade Española para el Procesamiento del Lenguaje Natural",
   keyword ="jspell,morphology,lex,PLN,nllex",
   url= "http://natura.di.uminho.pt/~jj/pln/nllex2.ps.gz",
}

@techreport{Almeida96c,
   author = "J.J. Almeida and J.C. Ramalho",
   title = "From {BiBTeX} to {HTML} semantic nets",
   institution =   umdi,
   year   = 1996,
   number = "DI-DAV-96:1:1",
   docpage = "http://natura.di.uminho.pt/~jcr/bib/dbib.html",
 keyword ="PDavid, bibtex, librarian studies,html",
}

@inproceedings{Ramalho96,
  author = "J.C. Ramalho and J.J. Almeida and P.R. Henriques",
  title = "Document Semantics: two approaches",
  booktitle = "Celebrating a Decade of SGML",
  year = 1996,
  month = "Nov.",
  address = "Boston, USA",
  note = "SGML'96 Conference",
  editor = "Graphic Communications Association",
  publisher = "ArborText",
  pages = "473--483",
 docpage="http://natura.di.uminho.pt/~jcr/projectos/david/COMS/sgml96/sgml96.html",
  keyword ="PDavid, Camila, SGML,AG,FS",
}

@InProceedings{SGML97,
  author = "J.C. Ramalho and J.G. Rocha and J.J. Almeida and P.R. Henriques",
  title = "SGML Documents: where does quality go?",
  booktitle = "SGML/XML'97 Conference",
  year =  1997,
  address = "Washington D.C. - USA",
  month =  "Dec.",
  keyword = "PDavid, SGML, Semantics",
}

@inproceedings{Almeida98,
   author = "J.J. Almeida",
   title = "Programação de dicionários",
   booktitle= "Actas do XIII Encontro da Associação Portuguesa de Linguística",
   year=1998,
   pages="21--28",
   volume= "1",
   address = "Lisboa 1997",
   keyword ="perl,morphology,dictionary,parser",
   url= "http://natura.di.uminho.pt/~jj/bib/progDic.ps.gz",
}

@inproceedings{Reis98,
   author = "Ricardo Reis and J.J. Almeida",
   title = "Etiquetador morfo-sintáctico para o Português",
   booktitle= "Actas do XIII Encontro da Associação Portuguesa de Linguística",
   address = "Lisboa 1997",
   year=1998,
   keyword ="tagger, PLN",
   url= "http://natura.di.uminho.pt/~jj/bib/etiquetador2.ps.gz",
}

@inproceedings{ABNO97a,
   title = "\textsc{Camila}: Formal Software Engineering Supported by Functional Programming",
   author = "J.J. Almeida and Barbosa, L.S. and Neves, F.L. and Oliveira, J.N.",
   booktitle = "Proc. II Conf. Latino Americana de Programaci\'on Funcional ({CLaPF97})",
   editor = "De Giusti, A. and Diaz, J. and Pesado, P.",
   year = 1997,
   month = "October",
   address = "La Plata, Argentina",
   pages = "1343--1358",
   keyword ="Camila, formal specification",
   url = "http://camila.di.uminho.pt/camila-doc/CLaPF97.ps.gz",
}

@inproceedings{ABNO97b,
   title = "\textsc{Camila}: Prototyping and Refinement of Constructive Specifications",
   author = "J.J. Almeida and Barbosa, L.S. and Neves, F.L. and Oliveira, J.N.",
   booktitle = "6th International Conference on Algebraic Methods and Software Technology ({AMAST'97})",
   editor = "Johnson, M.",
   year = 1997,
   month = "December",
   address = "Sydney, Australia",
   publisher = "Springer Lect. Notes Comp. Sci. (1349)",
   pages = "554--559",
   keyword ="Camila, formal specification",
}


@inproceedings{AH97,
   title = "Dynamic Dictionary = cooperative information sources",
   author = "J.J. Almeida and P.R. Henriques",
   year = 1998,
   address = "Australia",
   url = "http://natura.di.uminho.pt/~jj/bib/agentes97.ps.gz",
   keyword ="dictionary, Agentes",
   booktitle = "Proc. II Conference on Knowledge-based Intelligent Electronic Systems ({Kes98})",
   month = "April",
}

@inproceedings{museums98,
 author={J.G. Rocha and M.R. Henriques and J.C. Ramalho and J.J. Almeida 
    and J.L.  Faria and P.R. Henriques},
 title={Adapting Museum Structures for the Web: No Changes Needed!},
 booktitle = "Museums and the Web 1998",
 note = "Toronto - Canadá",
 year= 1998,
}

@inproceedings{ABBN98,
   author = "Almeida, J.J. and Barbosa, L.S. and Barros, J.B. and
             Neves, L.F.",
   title = "On The Development of \textsc{Camila}",
   booktitle = "Workshop on Research Themes on Functional Programming",
   year = 1998,
   month = "18 Sep.",
   editor = "L.S. Barbosa and J.A. Saraiva",
   publisher = "Proc. 3rd Summer School on Advan. Funct. Prog., Braga"
}

@article{Ramalho98,
   author = "J.C. Ramalho and J.J. Almeida and P.R. Henriques",
   title = "Algebraic specification of documents",
   year = 1998,
   volume = "199",
   pages = "231--247",
   journal = "Theoretical Computer Science",
   url="http://natura.di.uminho.pt/~jj/bib/amilp95.ps.gz",
   docpage="http://www.di.uminho.pt/~jcr/projectos/david/ARTIGOS/AMiLP95/amilp95.html",
   keyword ="PDavid, Camila, SGML",
}

@inproceedings{Gis99,
  author = "Jorge Rocha and Ana Silva and Ricardo Henriques and J.J. Almeida and Pedro Henriques",
  title={Formal Methods for {GI} Systems Development},
  booktitle = "Conferência da Association of Geographic Information
Laboratories for Europe (AGILE)",
  address="Roma",
  year=1999,
 keyword="GIS",
}

@inproceedings{RPA99,
  author={Jorge Rocha and Tiago Pedroso and J.J. Almeida},
  title ={{MAPit} - A tool set for automatically generation of {HTML} Maps},
  booktitle = "Conferência da Association of Geographic Information
Laboratories for Europe (AGILE)",
  address="Roma",
  year=1999,
 keyword="GIS, XML, mapit",
}

@inproceedings{RSea99,
  author = {Jorge Gustavo Rocha and Ana Silva and J.J. Almeida and Mario Ricardo Henriques and Pedro Rangel Henriques},
  title= {Sobre a Utilização de Metodologias Formais no Desenvolvimento de
{SIG}},
  booktitle = {GISBRASIL'99, Salvador},
  year=1999,
 keyword="GIS",
}

@inproceedings{xmldt99,
  author = { J.J. Almeida and José Carlos Ramalho},
  title = {{XML::DT} a Perl Down-Translation module},
  booktitle = "XML-Europe'99, Granada - Espanha",
  month = "May",
  year=  1999,
 keyword="XML, perl",
}

@article{RRAH99,
 author={J.C. Ramalho and J.G. Rocha and J.J. Almeida and P.R. Henriques},
 title = {SGML documents: Where does quality go?},
 journal={Markup Languages: theory and practice},
 Volume ="1",
 pages = "75--90",
 publisher="MIT Press",
 year= 1999,
 keyword="SGML",
}

@inproceedings{Barbosa2000,
   author = "L.S. Barbosa and J.B. Barros and J.J. Almeida",
   title = "Polytypic Recursion Patterns",
   year = 2000,
   month = "May",
   booktitle = "{SBLP'2000} (to appear as a ENTCS volume)",
   address = "{UFP}, Recife, Brasil",
   keyword="FS, Camila",
}

@inproceedings{jj2001x,
   author = "J.J. Almeida",
   title = "Smallbook -- comando para produção de livros em pequena escala",
   year = 2000,
   pages = "445--450",
   booktitle = "Actas da II Conferência Internacional de Tecnologias de
Informação e Comunicação na Educação",
   address="Braga",
   keyword="publishing, latex, smallbook",
}

@article{speaker:sepln2001,
 author={J.J. Almeida and A. M. Simões},
 title = {Text to speech -- a rewriting system approach},
  journal =    {Procesamiento del Lenguaje Natural},
 address="Sevilha",
 volume ="27",
 pages = "247--255",
 publisher="Sociedade Española para el Procesamiento del Lenguaje Natural",
 month="Sep.",
 year= 2001,
 keyword="TTS,RS,fonética",
}

@inproceedings{mp2001,
 author= {J.J. Almeida and  J. Gustavo Rocha and  P. Rangel Henriques and
    Sónia Moreira and Alberto Simões}, 
 title = {{Museu da Pessoa} -- Arquitectura} ,
 booktitle = {Congresso Nacional de Bibliotecários, Arquivistas e
    Documentalistas},
 address = {Porto}, 
 url = "http://natura.di.uminho.pt/~jj/bib/museuDaPessoa2001.ps.gz",
 month = "Maio",
 year= 2001,
}

@inproceedings{alfarrabio2001,
 author= {J.J. Almeida and P. Rangel Henriques and J. Gustavo Rocha and  
    Alberto Simões}, 
 title = {Alfarrábio: Adding value to an Heterogeneous Site Collection} ,
 booktitle = {Congresso Nacional de Bibliotecários, Arquivistas e
    Documentalistas},
 address = {Porto}, 
 url = "http://natura.di.uminho.pt/~jj/bib/alfarrabio2001.ps.gz",
 month = "Maio",
 year= 2001,
}

@inproceedings{freq2002,
 author= {Paulo A. Rocha and Alberto M. Simões and J.J. Almeida},
 title={ Cálculo de frequências de
palavras para entradas de dicionários através do uso conjunto de analisadores
morfológicos, taggers e corpora},
 booktitle={Actas do XVII Encontro da Associação Portuguesa de Linguística},
 address={Lisboa 2001},
 pages="407--418",
 year= 2002,
 url ="http://natura.di.uminho.pt/~jj/bib/apl:freqnormpt.ps.gz",
 abstract = {Apresentamos neste documento uma possível abordagem à
                extracção de frequências de palavras a partir de
                corpora, baseada numa utilização cooperativa de várias
                ferramentas de Processamento de Linguagem Natural.},
}

@inproceedings{jspell2002,
 author= {Alberto M. Simões and J.J. Almeida},
 title={ Jspell.pm -- um módulo de análise morfológica
para uso em processamento de linguagem natural},
 booktitle={Actas do XVII Encontro da Associação Portuguesa de Linguística},
 address={Lisboa 2001},
 pages="485--495",
 abstract = {Neste documento é nosso propósito apresentar as
                características presentes no analisador morfológico
                jspell e quais as suas consequências ao nível de
                aplicações de processamento de linguagem natural. Como
                ferramenta que é frequentemente integrada em software
                mais específico, apresentamos um módulo Perl
                desenvolvido com o objectivo de facilitar a interligação
                do analisador morfológico com pequenas aplicações
                desenvolvidas em linguagens de scripting. Devido à
                constante necessidade de melhoramento de dicionários, e
                em particular dos analisadores morfológicos, discutimos
                as propriedades que estes devem conter para facilitar o
                seu processamento e enriquecimento automático.},
 year= 2002,
}

@inproceedings{dag2002,
 author= {Alberto M. Simões and J.J. Almeida and Pedro R. Henriques},
 title={Directory Attribute Grammars},
 booktitle={VI Simpósio Brasileiro de Linguagens de Programação},
 pages = {297--308},
 address = {Rio de Janeiro, Brasil},
 year= 2002,
}

@inproceedings{elpub2002,
 author= {Alberto M. Simões and J.J. Almeida},
 title={Library::* -- a toolkit for digital libraries},
 booktitle={Elpub 2002 -- International Conference on Electronic Publishing},
 address = {Karlov Vary, República Checa},
 month="Nov.",
 pages = {203--211},
 year= 2002,
 isbn = "3-89700-357-0",
 abstract = {
  In last years the amount of digital documents has increased
  dramatically. Unfortunately the same did not occur with the
  structure and organization of the information. Traditionally we
  built a digital library using a catalog with documents'
  meta-information including a conceptual classification and an
  ontology of concepts.
  
  In this document we present a set of modules to help in the task of
  building and maintaining a digital library. It includes a module to
  work with ontologies, a set of modules to handle specific catalog
  formats (like Bib\TeX), a module to define new catalog formats
  and a tool to integrate ontologies and multi-format
  catalogs in a web browse-able knowledge-base.
 }
}

@article{parguess2002,
 author= {J.J. Almeida and Alberto M. Simões and J. Alves de Castro},
 title={Grabbing parallel corpora from the web},
 publisher="Sociedade Española para el Procesamiento del Lenguaje Natural",
  journal =    {Procesamiento del Lenguaje Natural},
 volume ="29",
 pages = "13--20",
 month="Sep.",
 year= 2002,
  abstract = {
  Multilingual resources  are useful for linguistic studies, translation, 
and many other tasks. Unfortunately, these resources are difficult to obtain
and organize. 
  In this document we describe a set of tools designed to help in the
task of mining bilingual resources from the web, from a specific site, 
from a file system, from a list of URLs, or from a translation memory.
  As a design goal we intend to build tools that can be used both
cooperatively (in pipeline)  and also in a independent way.  }
}

@Article{cP,
  author = "Alberto Manuel Simões",
  title = "Cooking Perl with flex",
  journal = "The Perl Review",
  year = "2002",
  volume = "0",
  number = "3", 
  month = "May",
  abstract = {

     There are a lot of tools for parser generation using Perl. As we
     know, Perl has flexible data structures which makes it easy to
     generate generic trees. While it is easy to write a grammar and a
     lexical analyzer using modules like \texttt{Parse::Yapp} and
     \texttt{Parse::Lex}, this pair of tools is not as efficient as I
     would like. In this document I'll present a way to cook quickly
     \texttt{Parse::Yapp} with the better lexical analyzer I know:
     \texttt{flex}.
  }
}
@InProceedings{APL2k2.Parguess,
  author = "J.J. Almeida and Alberto Manuel Simões and José Alves Castro",
  title     = "Extracção de corpora paralelo a partir da web: construção e
disponibilização",
  booktitle={Actas do XVIII Encontro da Associação Portuguesa de Linguística},
 address={Porto 2002},
  year      = "2003",
  lang      = "PT",
  url       = {http://alfarrabio.di.uminho.pt/~albie/publications/APL2k2.Parguess.pdf},
  abstract  = {
   Ao longo deste documento descrever-se-á um conjunto de ferramentas
   construídas para extracção automática de recursos bilingues a partir
   da Web, a partir de um \emph{site} específico, a partir de um
   sistema de ficheiros contendo alguns textos que sejam traduções de outros,
   ou ainda a partir de memórias de tradução.

   Neste trabalho apresenta-se todo o processo de construção de corpora
   paralelo desde os algoritmos de minagem dos dados (data mining) até à
   construção de vários tipos de recursos bilingues incluindo a construção
   automática de corpus paralelos pesquisáveis na Internet.
  }
}

@InProceedings{APL2k2.Synthesis,
  author = "J.J. Almeida and Alberto Manuel Simões",
  title     = "Geração de voz com sotaque",
  booktitle={Actas do XVIII Encontro da Associação Portuguesa de Linguística},
  address={Porto 2002},
  year      = "2003",
  url       = {http://alfarrabio.di.uminho.pt/~albie/publications/APL2k2.Synthesis.pdf},
  lang      = "PT",
  abstract  = {
   Como é sabido os sotaques podem estar ligados a uma zona geográfica,
   a um grupo social, podem até ser uma característica pessoal.  O seu
   estudo e descrição tem interessado muitos investigadores embora
   normalmente esse estudo tem sido feito de modo pouco formal.

   No trabalho que aqui se relata, tentou-se descrever formalmente
   sotaques e disfunções através de criação de regras a integrar como
   variantes num gerador de voz.  Deste modo, pretendeu-se criar um
   ambiente de experimentação dos modelos construídos para descrever
   algumas características de certos sotaques ou certas disfunções, de
   modo a permitir a sua validação.

   Constatou-se que se consegue obter certas disfunções e certos
   sotaques com facilidade por simples acrescento de regras opcionais
   em certas fases da geração da voz. Outros, aparentam ser de maior
   dificuldade, ou por não conhecermos suficiente bem os fenómenos
   neles envolvidos ou envolverem maior complexidade prosódica.
  },
}

@InProceedings{xata:xmldt,
  author    = "J.J. Almeida and Alberto Manuel Simões",
  title     = "Engenharia reversa de {HTML} usando tecnologia {XML}",
  booktitle = "{XATA --- XML}, Aplicações e Tecnologias Associadas",
  year      = "2003",
  url =
{http://alfarrabio.di.uminho.pt/~albie/publications/xata2003xml.pdf},
  lang      = "PT",
  abstract  = { O proliferar de ferramentas criadores de HTML e o uso
                  de HTML guiado pelo aspecto, tem vindo a arruinar o
                  seu lado conceptual. Este problema foi reconhecido e
                  deu origem a vários formatos ou tecnologias com o
                  objectivo de separar o aspecto do conceito.  No
                  entanto a realidade actual mostra uma enorme
                  quantidade de páginas HTML com péssima leitura
                  conceptual e estrutural, invalidando uma série de
                  usos possíveis da informação nelas contida.  Nesta
                  comunicação apresenta-se um trabalho (em fase
                  inicial) que pretende fazer engenharia reversa de
                  HTML para permitir aumentar a sua acessibilidade, a
                  fim de ser usada num \emph{browser} para invisuais.
                  },
  irreditor ="José Carlos Ramalho",
}

@InProceedings{xata:museudapessoa,
  author = "Alberto Manuel Simões and J.J. Almeida",
  title     = {{H}istórias de {V}ida + {P}rocessamento {E}strutural = {M}useu
da {P}essoa},
  booktitle = "{XATA --- XML}, Aplicações e Tecnologias Associadas",
  year      = "2003",
  lang      = "PT",
  abstract  = {
   Este artigo apresenta a arquitectura actual do Museu da Pessoa,
   contemplando a forma como os documentos estão a ser editador,
   catalogados, arquivados, e processados para a criação das estruturas
   necessárias ao Museu.
},
url = {http://alfarrabio.di.uminho.pt/~albie/publications/xata2003mp.pdf},
  editor ="José Carlos Ramalho",
}

@InProceedings{elpub2003,
  author = "Alberto Manuel Simões and J.J. Almeida",
  year= 2003,
  title= "Music publishing",
  isbn = "972-98921-2-1",
  note = {Guimarães},
  publisher = "Universidade do Minho",
  url = {http://alfarrabio.di.uminho.pt/~albie/publications/elpub2003.pdf},
  editor ="Sely Costa et al.",
  booktitle= "ElPub 2003 -- International conference on electronic publishing",
  month = "June",
  pages = "288--298",
  lang="EN",
  abstract = {
     Current music publishing in the Internet is mainly concerned with
  sound publishing. We claim that music publishing is not only to make
  sound available but also to define relations between a set of music
  objects like music scores, guitar chords, lyrics and their
  meta-data. We want an easy way to publish music in the Internet, to
  make high quality paper booklets and even to create Audio CD's.
  In this document we present a workbench for music publishing based
  on open formats, using open-source tools and script programming over
  them.  The workbench is based on an archive specification written in
  a text-based format which includes sound references, music scores,
  chords and lyrics and their meta-information.
  },

 keyword ="música, bibliotecas digitais",
}

@InProceedings{cp3a:terminum2003,
  author = "J.J. Almeida and Alberto Simões and José Castro and Bruno
     Martins and Paulo Silva",
  year= 2003,
  title= "Projecto {TerminUM}",
  publisher = "Universidade do Minho",
  booktitle={CP3A 2003 -- Workshop em Corpora Paralelos: aplicações e
    algoritmos associados},
  note = {Braga},
  month="Jun.",
  pages = "7--14",
 keyword ="terminum, parallel corpora",
   url = {http://alfarrabio.di.uminho.pt/~albie/publications/cp3a2003-terminum.pdf},
  abstract = { O projecto TerminUM tem como objectivos principais o
                  estudo, experimentação e a criação de recursos na
                  área dos corpora paralelos, terminologia
                  (descritiva) e recursos multilingues ligados a
                  corpora: fazer extracção tão automática quanto
                  possível de corpora a partir da web; fazer extracção
                  de dicionários, de terminologia e de outros recursos
                  ligados à tradução; criar e interligar as
                  ferramentas desenvolvidas; criar e disponibilizar:
                  (1) listas de Bitextos, corpora e corpora paralelos,
                  (2) ferramentas de criação e transformação de
                  corpora, (3) recursos multilingues derivados/ligados
                  a corpora.  Nesta apresentação serão abordadas
                  algumas tarefas presentemente a decorrer no âmbito
                  do projecto, nomeadamente: ciclo de vida da
                  construção e transformação de corpora; resumo das
                  ferramentas desenvolvidas (e em desenvolvimento);
                  construção de corpora paralelos tomando como base
                  legendas de filmes (subtitles), ficheiro de
                  internacionalização (mensagens de software .po) e
                  ficheiros de memórias de tradução (TMX); animação de
                  corpora paralelos via web (criação de motores de
                  consulta usando diversas ferramentas).  }
}

@InProceedings{cp3a:kvec2003,
  author = "Bruno Martins",
  year= 2003,
  title= "{Lingua-Biterm}: um módulo Perl para extracção de terminologia bilingue",
  publisher = "Universidade do Minho",
  booktitle={CP3A 2003 -- Workshop em Corpora Paralelos: aplicações e
    algoritmos associados},
  note = {Braga},
  month="Jun.",
  pages = "65--70",
 keyword ="kvec, terminum, parallel corpora, word alignment",
}

@InProceedings{cp3a:natools2003,
  author = "Alberto Simões",
  year= 2003,
  title= "Alinhamento de corpora paralelos",
  publisher = "Universidade do Minho",
  booktitle={CP3A 2003 -- Workshop em Corpora Paralelos: aplicações e
    algoritmos associados},
  note = {Braga},
  month="Jun.",
  pages = "71--77",
 keyword ="natools, terminum, parallel corpora, word alignment",
}

@article{sepln2003,
 author= {Alberto M. Simões and J.J. Almeida},
 title="{NATools} -- A Statistical Word Aligner Workbench",
 publisher="Sociedade Española para el Procesamiento del Lenguaje Natural",
  journal =    {Procesamiento del Lenguaje Natural},
 volume ="31",
 pages = "217--224",
 month="Sep.",
 year= 2003,
 abstract = {This document presents the TerminUM project and the
  work done in its statistical word aligner workbench (NATools). It
  shows a variety of alignment methods for parallel corpora and
  discusses the resulting terminological dictionaries and their use:
  evaluation of sentence translations; construction of a multi-level
  navigation system for linguistic studies or statistical
  translations. },
 keyword ="natools, terminum, parallel corpora, word alignment",
}

@phdthesis{tesejj,
  author =       {José João Dias de Almeida},
  title =        {Dicionários dinâmicos multi-fonte},
  school =       {Universidade do Minho},
  type =         "Tese de Doutoramento",
  superviser =   "Pedro Rangel Henriques",
  url=           "http://natura.di.uminho.pt/~jj/bib/tesejj.pdf",
  year =         2003,
  lang      = "PT",
}

@MastersThesis{teseambs,
  author =       {Alberto Manuel Brandão Simões},
  title =        {Parallel Corpora word alignment and applications},
  school =       {Escola de Engenharia - Universidade do Minho},
  url=           "http://alfarrabio.di.uminho.pt/~albie/publications/msc.pdf",
  type =         "Tese de Mestrado",
  superviser =   "José João Almeida and Pedro Rangel Henriques",
  year =         {2004},
  lang      = "EN",
}

@InProceedings{xata04:tx,
  author =       {José João Almeida and Alberto Simões},
  title =        {{TX} --- {V}alidação de {XML} baseada em tipos dinâmicos},
  booktitle =    {{XATA 2004} - XML, Aplicações e Tecnologias Associadas},
  pages =        {217--224},
  year =         {2004},
  url =
{http://alfarrabio.di.uminho.pt/~albie/publications/xata04-tx.pdf},
  lang      = "PT",
  isbn =         {972-99166-0-8},
  irreditor =       {José Carlos Ramalho and Alberto Simões},
  month =        {February},
  abstract = {
  Desde o advento do SGML e posteriormente do XML, que a validação de
  documentos tem sido focada.
  
  Esta validação surgiu para analisar a estrutura dos documentos SGML
  e XML usando DTDs.  Além dessa, e devido às restrições do XML em
  relação ao SGML, a validação de XML bem formado também tem sido
  usada.  Mais recentemente, os Schema e Schematron vieram permitir a
  validação a um nível superior: não só a estrutura do documento mas
  também alguma validação de conteúdo.
  
  Neste artigo apresentamos a ferramenta TX que visa outro nível de
  validação, em que os tipos possam ser mais ricos e/ou calculados
  dinamicamente, e onde se possa definir funções de anotação e/ou
  correcção das porções do documento que não sigam as especificações.
  }
}



@InProceedings{xata04:mtd,
  author =       {Alberto Simões and José João Almeida and Xavier Gomez
Guinovart},
  title =        {Memórias de Tradução Distribuídas},
  booktitle =    {{XATA 2004} --- XML, Aplicações e Tecnologias Associadas},
  pages =        {59--68},
  year =         {2004},
  lang      = "PT",
  url = {http://alfarrabio.di.uminho.pt/~albie/publications/xata04-mtd.pdf},
  isbn =         {972-99166-0-8},
  irreditor =       {José Carlos Ramalho and Alberto Simões},
  month =        {February},
  abstract = {
  Neste documento apresenta-se o conceito de memórias de tradução
  distribuídas, discutindo-se o seu interesse na área da tradução, bem
  como as vantagens que uma ferramenta de tradução pode tirar do seu
  uso.
  
  É apresentada uma possível implementação de memórias de tradução
  distribuídas usando WebServices numa arquitectura de cooperativismo.
  São definidos as mensagens (API) que um serviço deste género deve
  implementar para que uma ferramenta de tradução possa tirar partido
  da colaboração entre tradutores.
  }
}

@article{xmldt2,
  author = {Alberto Simões},
  title = {{XML::DT} - Down-Translating XML},
  journal = {The Perl Review},
  number = 1,
  volume = 1,
  year = 2004,
}

@article{sepln2004,
 author= {Alberto Simões and Xavier Gómez Guinovart and J.J. Almeida},
 title={Distributed Translation Memories implementation using WebServices},
 publisher="Sociedade Española para el Procesamiento del Lenguaje Natural",
 journal =      {Procesamiento del Lenguaje Natural},
  pages =        {89--94},
  volume =       {33},
  month =        {July},
  year =         {2004},
  lang =         {EN},
 url = {http://alfarrabio.di.uminho.pt/~albie/publications/dtm-sepln.pdf},
 keyword ={TMs, MT, distributed translation memories, WebServices, CAT},
 abstract= { Translation Memories are very useful for translators
  but are difficult to share and reuse in a community of translators.
  This article presents the concept of Distributed Translation
  Memories, where all users can contribute and sharing translations.
  Implementation details using WebServices are shown, as well as an
  example of a distributed system between Portugal and Spain.}
}

@inproceedings{linguateca,
 author = {Diana Santos and Alberto Simões and Ana Frankenberg-Garcia and 
   Ana Pinto and Anabela Barreiro and Belinda Maia and Cristina Mota and Débora
   Oliveira and Eckhard Bick and Elisabete Ranchhod and J.J. Almeida
   and Luís Cabral and Luís Costa and Luís Sarmento and Marcirio Chaves and Nuno
   Cardoso and Paulo Rocha and Rachel Aires and Rosário Silva and Rui Vilela and
   Susana Afonso},
 title = {Linguateca: um centro de recursos distribuído para o processamento
   computacional da língua portuguesa},
 year = 2004,
 booktitle = {Workshop on Linguistic Tools and Resources for Spanish and
   Portuguese},
 editor = {IBERAMIA 2004},
 lang = "EN",
 pages="147--154",
 address="Puebla, México",
 url = {http://alfarrabio.di.uminho.pt/~albie/publications/linguateca.pdf},
 abstract = {
   Neste artigo apresentamos uma panorâmica da actividade da Linguateca na criação
   e disponibilização de recursos e ferramentas para a língua portuguesa.  Começamos
   por uma descrição dos objectivos e pressupostos da Linguateca e uma breve história
   da sua intervenção, e finalizamos com algumas considerações sobre a melhor forma
   de prosseguir na organização da área.
 }
}


@inproceedings{xata05:fs,
    author="Rui Vilela and Alberto Simões and Eckhard Bick and J.J. Almeida",
    title="Representação em {XML} da {F}loresta {S}intáctica",
    month="Fev.",
    year=2005,
    irreditor="José Carlos Ramalho and Alberto Simões and João Correia Lopes",
    booktitle="XATA 2005, Aplicações e Tecnologias Associadas",
    publisher="Departamento de Informática, Universidade do Minho",
    location="Braga",
    keyword = {XML, Floresta Sintáctica, tigerXML, Lingua::PT::Dirty},
}

@inproceedings{xata05:tdt,
    author="J.J. Almeida and Alberto Simões",
    title="Inferência de tipos em documentos {XML}",
    month="Fev.",
    year=2005,
    irreditor="José Carlos Ramalho and Alberto Simões and João Correia Lopes",
    booktitle="XATA 2005, Aplicações e Tecnologias Associadas",
    publisher="Departamento de Informática, Universidade do Minho",
    location="Braga",
    keyword = {XML, XML::DT},
}

@inproceedings{xata06:navegante,
    author="J.J. Almeida and Alberto Simões",
    title="Navegante: um proxy de ordem superior para navegação intusiva",
    month="Fev.",
    year=2006,
    irreditor="José Carlos Ramalho and Alberto Simões and João Correia Lopes",
    booktitle="XATA 2006, Aplicações e Tecnologias Associadas",
    publisher="ESTGP",
    address =      {Portalegre},
    Note="poster",
    pages="376--377",
    keyword = {XML, XML::DT, HTML},
}

@inproceedings{xata06:xmlauto,
    author="J.J. Almeida and Alberto Simões",
    title =        {Geração dinâmica de {API}s {P}erl para criação de {XML}},
    month="Fev.",
    year=2006,
    irreditor="José Carlos Ramalho and Alberto Simões and João Correia Lopes",
  booktitle =    {{XATA 2006} --- 4ª Conferência Nacional em XML, Aplicações e Tecnologias Aplicadas},
    publisher="ESTGP",
  url = {http://alfarrabio.di.uminho.pt/~albie/publications/xata2006-xmlwritersimple.pdf},
  address =      {Portalegre},
    pages="307--314",
    keyword = {XML, XML::DT, HTML},
  isbn =         {972-99166-2-4},
  lang =         "PT",
  abstract =     {
  É consensual que o XML como linguagem para a estruturação de documentos
  tem vindo a tomar um lugar relevante. É também evidente a vantagem
  obtida no uso de XML como linguagem de intercâmbio.
  No entanto, a sua sintaxe é
  demasiado descritiva pelo que a geração de documentos de forma
  manual é dolorosa sendo útil dispor de módulos
  que simplifiquem essa tarefa.

  Neste artigo propomos um módulo Perl (XML::Writer::Simple) configurável via
  DTD que simplifica a tarefa de gerar XML.
  },
}

@article{sepln06,
  author =       {Alberto Simões and J. João Almeida},
  title =        {{NatServer:} A Client-Server Architecture for building Parallel Corpora applications},
  year =         {2006},
  journal =    {Procesamiento del Lenguaje Natural},
  address =      {Zaragoza, Spain},
  url = {http://alfarrabio.di.uminho.pt/~albie/publications/sepln06.pdf},
  month =        {September},
  lang =         {EN},
  volume =       {37},
  pages =        {91--97},
  abstract =     {Parallel corpora are important resources for most
                  Natural Language processing tasks. From the common
                  applications, like machine translation, to the
                  usually mono-lingual tasks as paraphrase detection
                  and word sense disambiguation, most researchers are
                  using massive parallel corpora.  Thus, the
                  availability of an efficient way to manage them is
                  very important.  This paper presents a Client-Server
                  architecture to query efficiently parallel corpora
                  and probabilistic translation dictionaries.},
}

@inProceedings{eamt06,
  author =       {Alberto Simões and J. João Almeida},
  title =        {Combinatory Examples Extraction for Machine Translation},
  shortin = {{EAMT}},
  year =         {2006},
  url = {http://alfarrabio.di.uminho.pt/~albie/publications/eamt06.pdf},
  booktitle = {11th Annual Conference of the European Association for Machine Translation},
  editor =     {Jan Tore L\o nning and Stephan Oepen},
  address =   {Oslo, Norway},
  pages =     {27--32},
  month =     {19--20, June},
  isbn =      {82-7368-294-3},
  lang = {EN},
  abstract = { One of the bottlenecks of example-based machine
                  translation (EBMT) is to be able to amass
                  automatically quantities of good examples.  In our
                  work in EBMT, we are investigating how far one can
                  go by performing example extraction from parallel
                  corpora using Probabilistic Translation Dictionaries
                  to obtain example segmentation points.  In fact, the
                  success of EBMT highly depends on examples quality
                  and quantity, but also in their length. Thus, we
                  give special importance on methods to extract
                  different size examples from the same translation
                  unit.  With this article we show that it is possible
                  to extract quantities for examples from parallel
                  corpora just using probabilistic translation
                  dictionaries extracted from the same corpora.},
}


@InProceedings{lrec06,
  author =       {José João Almeida and Alberto Simões},
  title =        {{$T_2O$} --- Recycling Thesauri into a Multilingual Ontology},
  shortin = {{LREC}},
  booktitle =    {Fifth international conference on Language Resources and Evaluation, LREC 2006},
  year =         {2006},
  url = {http://alfarrabio.di.uminho.pt/~albie/publications/lrec06.pdf},
  address =      {Genova, Italy},
  month =        {May},
  lang =         {EN},
  abstract =     {In this article we present $T_2O$ --- a workbench to
                  assist the process of translating heterogeneous
                  resources into ontologies, to enrich and add
                  multilingual information, to help programming with
                  them, and to support ontology publishing. $T_2O$ is
                  an ontology algebra.},
}

@inProceedings{elpub06-t2o,
  author =       {J. João Almeida and  Alberto Simões },
  title =        {Publishing multilingual ontologies: a quick way of obtaining feedback},
  year =         {2006},
  booktitle =    {{ElPub 2006} --- Digital Spectrum: Integrating Technology and Culture},
  address =   {Bansko, Bulgaria},
  pages = "373--374",
  note="poster",
  url = {http://alfarrabio.di.uminho.pt/~albie/publications/elpub06-t2o.pdf},
  month =     {June},
  lang = {EN},
  abstract = {Dictionary and Thesaurus are valuable resources for
                  Natural Language Processing but do not exist as
                  freely available as expected, especially for
                  languages other than English and, when they exist,
                  they are just available for querying online. Our
                  main goal with T2O --- Thesaurus to Ontology
                  framework --- is to create a multilingual ontology:
                  freely available online and to download; with a
                  computer readable format; with a good API; with a
                  structure as rich as possible; reusing all the
                  structured information we can get; },
}



@InProceedings{elpub06-blind,
  author =       {António R. Fernandes and Alexandre Carvalho and J. João Almeida and  Alberto Simões },
  title =        {Transcoding for Web Accessibility for the Blind: Semantics
from Structure},
  shortin = {{ElPub}},
  booktitle = {ElPub 2006 --- Digital Spectrum: Integrating Technology and Culture},
  year =      {2006},
  url  = {http://alfarrabio.di.uminho.pt/~albie/publications/elpub06-blind.pdf},
  address =   {Bansko, Bulgaria},
  month =     {June},
  pages = {123-134},
  abstract = {True accessibility requires minimizing the scanning time
                  to find a particular piece of
                  information. Sequentially reading web pages do not
                  provide this type of accessibility, for instance
                  before the user gets to the actual text content of
                  the page it has to go through a lot of menus and
                  headers. However if the user could navigate a web
                  page based through semantically classified blocks
                  then the user could jump faster to the actual
                  content of the page, skipping all the menus and
                  other parts of the page. We propose a transcoding
                  engine that tackles accessibility at two distinct,
                  yet complementary, levels: for specific known sites
                  and general unknown sites. We present a tool for
                  building customized scripts for known sites that
                  turns this process in an extremely simple task,
                  which can be performed by anyone, without any
                  expertise. For general unknown sites, our approach
                  relies on statistical analysis of the structural
                  blocks that define a web page to infer a semantic
                  for the block.},
   lang = "EN",
}

% ^^^^^ 2007 ^^^^^^^^^^

%InProceedings{gfkl07:topology,
%  author =       {Anália Lourenço and Alberto Simões and Orlando Belo},
%  title =        {Evaluating Web Site Structure based on Navigation Profiles and Site Topology},
%  booktitle =    {The 31st Annual Conference of the German Classification Society on Data Analysis, Machine Learning, and Applications},
%  year =         {2007},
%  note =         {\textbf{forthcoming}},
% }


%InProceedings{gfkl07:terminology,
%  author =       {Alberto Simões and José João Almeida},
%  title =        {Using Alignment Patterns for Bilingual Terminology Extraction},
%  booktitle =    {The 31st Annual Conference of the German Classification Society on Data Analysis, Machine Learning, and Applications},
%  year =         {2007},
%  note =         {\textbf{forthcoming}},
% }


%InProceedings{gfkl07:music,
%  author =       {Alberto Simões and Anália Lourenço and José João Almeida},
%  title =        {Mining Classical Music Scores for Epoch Classification},
%  booktitle =    {The 31st Annual Conference of the German Classification Society on Data Analysis, Machine Learning, and Applications},
%  year =         {2007},
%  note =         {\textbf{forthcoming}},
% }


@InCollection{avalon:jspell,
  author =       {José João Almeida and Alberto Simões},
  title =        {Jspellando nas morfolimpíadas: Sobre a participação do {Jspell} nas Morfolimpíadas},
  booktitle =    {Avaliação conjunta: um novo paradigma no processamento computacional da língua portuguesa},
  shortin =      {Avaliação conjunta, cap. 8},
  year =         {2007},
  editor =       {Diana Santos},
  pages =        {83--90},
  publisher =    {{IST Press}},
}

@InCollection{avalon:avalinha,
  author =       {Alberto Simões and José João Almeida},
  title =        {Avaliação de alinhadores},
  shortin =      {Avaliação conjunta, cap. 18},
  booktitle =    {Avaliação conjunta: um novo paradigma no processamento computacional da língua portuguesa},
  publisher =    {{IST Press}},
  year =         {2007},
  pages =        {219--230},
  editor =       {Diana Santos},
}

@InProceedings{xata07:xmltmx,
  author =       {José João Almeida and Alberto Simões},
  title =        {{XML::TMX} --- Processamento de Memórias de Tradução de Grandes Dimensões},
  shortin = {{XATA}},
  booktitle =    {{XATA 2007} --- 5ª Conferência Nacional em XML, Aplicações e Tecnologias Aplicadas},
  year =         {2007},
  month =        {February},
  url = {http://alfarrabio.di.uminho.pt/~albie/publications/xmltmx07.pdf},
  pages =        {83--93},
  isbn =         {978-972-99166-4-9},
  irreditor =       {José Carlos Ramalho and João Correia Lopes and Luís Carríço},
  abstract =     { As ferramentas de tradução assistida por computador
                  tentam reutilizar as traduções realizadas pelo
                  tradutor sempre que uma frase semelhante tenha sido
                  já traduzida. Para o intercâmbio destes documentos
                  foi definido um formato denominado TMX (Translation
                  Memory Exchange) baseado em XML.  Este tipo de
                  documento ganha facilmente tamanhos incomportáveis
                  para o seu processamento com métodos tradicionais.
                  Neste artigo propomos uma metodologia de ordem
                  superior para o processamento de documentos de
                  estrutura repetitiva (em que se inserem as memórias
                  de tradução) com uma abordagem baseada na conjunção
                  de SAX e DOM.  São apresentados vários exemplos de
                  filtros sobre memórias de tradução bem como um
                  conjunto de medidas da sua eficiência.  }
}

% InProceedings{xata07:xmlyamljson,
%    author =       {Rúben Fonseca and Alberto Simões},
%   title =        {Alternativas ao {XML}: {YAML} e {JSON}},
%   booktitle =    {{XATA 2007} --- 5ª Conferência Nacional em XML, Aplicações e Tecnologias Aplicadas},
%   year =         {2007},
%   month =        {February},
%   url = {http://alfarrabio.di.uminho.pt/~albie/publications/xmlyamljson07.pdf},
%   pages =        {33--46},
%   isbn =         {978-972-99166-4-9},
%   editor =       {José Carlos Ramalho and João Correia Lopes and Luís Carríço},
%   abstract =     { O XML tem sido eleito como a linguagem de anotação por excelência,
% possuindo ao mesmo tempo boas capacidades para serialização de estruturas
% computacionais e transporte de dados independente da plataforma.
% Recentemente porém, novos formatos de dados têm surgido. Alguns deles
% têm tido uma boa aceitação porque resolvem alguns problemas ou limitações
% do XML, sendo em algumas situações um bom complemento ou substituto do mesmo.
% Neste artigo iremos apresentar dois desses formatos de dados - o YAML e o
% JSON - fazendo uma abordagem geral dos mesmos e analisando algumas métricas
% que nos poderão ajudar a decidir se e quando usar estas alternativas.
%  }
% }

% techreport{man:rena,
%   author = "Edgar Alves and José João Almeida",
%   title = "Manual de Utilizador do {RENA}",
%   year = 2006,
%   type = "Manual",
%   month = "Jul",
%   institution = umdi,
%   keyword = "REM, PLN, IR",
%   abstract = {},
%   url = "http://natura.di.uminho.pt/~jj/pln/rena.pdf",
% }

@InProceedings{MP07,
  author =       {Alberto Simões and Rúben Fonseca and José João Almeida},
  title =        {{Makefile::Parallel} Dependency Specification Language},
  booktitle =    {Euro-Par 2007},
  year =         {2007},
  address =      {Rennes, France},
  month =        {August},
  pages =	 {33--41},
  editor =	 {Anne-Marie Kermarrec and Luc Bougé and Thierry Priol},
  volume =	 {4641},
  series =	 {LNCS},
  publisher =	 {Springer-Verlag},
  abstract =     {  Some processes are not easy to be programmed from scratch
for
  parallel machines (clusters), but can be easily split on simple
  steps. Makefile::Parallel is a tool which lets users to specify how processes
  depend on each other.

  The language syntax resembles the well known Makefile
  makefiles format, but instead of specifying files or targets
  dependencies, Makefile::Parallel specifies processes (or jobs) dependencies.

  The scheduler submits jobs to the cluster scheduler (in our case,
  Rocks PBS) waiting them to end. When each process finishes,
  dependencies are calculated and direct dependent jobs are submitted.

  Makefile::Parallel language includes features to specify parametric rules,
used
  to split and join processes dependencies.  Some tasks can be split
  into n smaller jobs working on different portions of files. At the
  end, another process can be used to join the results.
},
}


@InProceedings{epia-bio-2007,
  booktitle = {New Trends in Artificial Intelligence},
  irreditor = {José Neves and Manuel Filipe Santos and José Manuel Machado},
  pages = {541--552},
  isbn13 = {978-989-95618-0-9},
  author =       { Anália  Lourenço and Alberto  Simões and José João Almeida  and
Miguel  Rocha and  Isabel  Rocha and Eugénio  Ferreira},
  title =        {An Ontology-Based Approach To Systems Biology Literature
Retrieval and Processing},
  shortin = {Epia, CMBSB},
  year =         {2007},
  month =        {December},
  abstract =     {This paper details the \emph{SysBio Explorer}, a
                  Systems Biology Literature Retrieval and Processing
                  Framework, whose aim relies on the automatic
                  inference of regulatory and metabolic networks based
                  on biomedical literature. The \emph{SysBio Explorer}
                  does not focus on any organism or problem in
                  particular and encompasses a number of processing
                  and analysis techniques. It works over full-text
                  documents, applying Natural Language Processing
                  techniques and using biomedical dictionaries and
                  ontologies together with hand-made rules. Besides
                  biological entity recognition and relation
                  extraction, document classification, relevance
                  assessment and authoring networks are also within
                  its present scope. The framework is described in
                  terms of its design requirements and implementation
                  decisions, exposing current achievements, but also
                  highlighting present obstacles and future
                  work. Experiments over real-world problems
                  concerning the organisms \emph{E. coli},
                  \emph{S. cerevisiae} and \emph{H. pylori} are used
                  in its validation.}
}

@InProceedings{epia-music-2007,
  booktitle = {New Trends in Artificial Intelligence},
  irreditor = {José Neves and Manuel Filipe Santos and José Manuel Machado},
  shortin = {Epia, TEMA},
  pages = {791--799},
  author =       {Alberto Simões and Anália Lourenço and José João Almeida},
  title =        {Using Text Mining Techniques for Classical Music Scores
Analysis},
  year =         {2007},
  month =        {December},
  abstract =     {Music Classification is a particular area
                  of Computational Musicology that provides valuable
                  insights about the evolving of composition patterns
                  and assists in catalogue generation. The proposed work
                  detaches from former works by classifying music based
                  on music score information. Text Mining techniques
                  support music score processing while Classification
                  techniques are used in the construction of decision
                  models. Although research is still at its earliest
                  beginnings, the work already provides valuable
                  contributes to symbolic music representation processing 
                  and subsequent analysis. Score processing involved
                  the counting of ascending and descending chromatic
                  intervals, note duration and meta-information
                  tagging. Analysis involved feature selection and
                  the evaluation of several data mining algorithms,
                  ensuring extensibility towards larger repositories or
                  more complex problems. Experiments report the analysis
                  of composition epochs on a subset of the Mutopia project 
                  open archive of classical LilyPond-annotated
                  music scores.  
  },
}


@InCollection{harem:rena,
  author =       {J.João Almeida},
  title =        {{RENA} - Reconhecedor de Entidades},
  booktitle =    {Reconhecimento de entidades mencionadas em português},
  year =         {2007},
  note = {Documentação e actas do HAREM, a primeira avaliação conjunta na
área},
  pages =        {157-172},
  url = {http://acdc.linguateca.pt/aval_conjunta/LivroHAREM/Cap13-SantosCardoso2007-Almeida.pdf},
  ISBN = {978-989-20-0731-1},
  irreditor =       {Diana Santos and Nuno Cardoso},
  publisher = {Linguateca},
  shortin ={{HAREM} cap. XIII},
}

@Article{sepln07,
  author =       {Alberto Simões and José João Almeida},
  title =        {Parallel Corpora based Translation Resources Extraction},
  journal =      {Procesamiento del Lenguaje Natural},
  year =         {2007},
  pages =        {265--272},
  volume =       {39},
  month =        {September},
  lang =         {EN},
  abstract =     {This paper describes NATools, a toolkit to process,
                  analyze and extract translation resources from
                  Parallel Corpora. It includes tools like a
                  sentence-aligner, a probabilistic translation
                  dictionaries extractor, word-aligner, a corpus
                  server, a set of tools to query corpora and
                  dictionaries, as well as a set of tools to extract
                  bilingual resources.}
}

@InProceedings{cgiauto08,
  author =       {Davide Sousa and Alberto Simões and José João Almeida},
  title =        {{CGI::Auto} --- Automatic Web-Service Creation},
  booktitle =    {{XATA 2008} --- 6ª Conferência Nacional em XML, Aplicações e
Tecnologias Aplicadas},
  year =         {2008},
  month =        {February},
  url = {http://alfarrabio.di.uminho.pt/~albie/publications/cgiauto08.pdf},
  pages =        {22--27},
  isbn =         {978-972-99166-5-6},
  irreditor =       {José Carlos Ramalho and João Correia Lopes and Salvador
Abreu},
  abstract =     {   The creation of a CGI or a WebService as an interface for
a command line tool is
not as unusual as it may seem. It is extremely usual and useful.

There are applications developed as command line tools that can be useful for
different purposes,
and different kind of users. Some of these users might not be able to run
these tools directly.
For instance, it
is not easy to install a bunch of Perl modules to have a small tool working.
For these situations, it is easier to make the tool available in the Web or as
a
WebService.

The problem with making the tool available in these fashions, is that
programmers tend to rewrite
the tools to incorporate the CGI or XML specific layers.

We defend that these CGI or WebService interfaces should use the already
available command line
 tool, without any change. This interface should be able to read a simple
textual
specification of how the command line tool works, and buid the CGI or XML
specific layers
automatically.

The CGI::Auto module aims this purpose:
to encapsulate command line tools in a CGI layer based on a textual
specification, transforming
the command line tool in a web application.

 }
}


@InProceedings{navegante08,
  author =       {Nuno Carvalho and José João Almeida and Alberto Simões},
  title =        {{NAVEGANTE} --- An Intrusive Browseing Framework},
  booktitle =    {{XATA 2008} --- 6ª Conferência Nacional em XML, Aplicações e
Tecnologias Aplicadas},
  year =         {2008},
  month =        {February},
  url = {http://alfarrabio.di.uminho.pt/~albie/publications/navegante08.pdf},
  pages =        {52--63},
  isbn =         {978-972-99166-5-6},
  irreditor =       {José Carlos Ramalho and João Correia Lopes and Salvador
Abreu},
  abstract =     {   NAVEGANTE is a generic framework to build superior order
proxies for
  intrusive browsing. This framework provides the means for developing
  tools that behave as proxies, but perform some processing task on
  the content that is being browsed. Parallel to this content processing,
  applications can also run other user-defined functions with different
  purposes and interfaces, but we'll explain those later. Currently,
  NAVEGANTE only builds applications that run as CGIs, but this is intended
  to change in a near future. Applications are built writing programs in
  NAVEGANTE's Domain Specific Language (DSL).

  NAVEGANTE is a work in progress. This article aims to describe the current
  state of development. What applications can be built and how. Also, we
  identify some implementation problems, and briefly discuss some future
  improvements. Finally, we try to illustrate most of the concepts described
  using a couple of case studies.
 }
}

@Article{sepln08,
  author =       {Alberto Simões and José João Almeida},
  title =        {Bilingual Terminology Extraction based on Translation
Patterns},
  journal =      {Procesamiento del Lenguaje Natural},
  year =         {2008},
  month =        {September},
  lang =         {EN},
  abstract =     {Parallel corpora are rich sources of translation
    resources. This document presents a methodology for the extraction
of bilingual
    nominals (terminology candidates) from parallel corpora, using
translation patterns.
    The patterns proposed in this work specify the order changes that
occur during translation
    and that are intrinsic to the involved languages syntaxes.
    These patterns are described in a domain specific language
    named PDL (Pattern Description Language), and are extremely
    efficient for the detection of nominal phrases.
  },
  volume = {41},
  pages = {281--288},
}


@inproceedings{propor-apslt08,
  author    = { J. J. Almeida and  Alberto Simões},
  title     = { A Textual Rewriting system for NLP},
  booktitle = {Applications of Portuguese Speech and Language Technologies, 
               PROPOR 2008 Special session},
  year      = {2008},
  pages     = {35--42},
  irreditor    = {António Teixeira and Daniela Braga},
}

@inproceedings{epia:DruryA09,
  author    = {Brett Drury and J. J. Almeida},
  title     = {Construction of a Local Domain Ontology from News Stories},
  booktitle = {EPIA},
  year      = {2009},
  pages     = {400-410},
  url       = {http://dx.doi.org/10.1007/978-3-642-04686-5_33},
  publisher = {Springer},
  series    = {Lecture Notes in Computer Science},
  volume    = {5816},
  note     = {Progress in Artificial Intelligence, EPIA 2009, Aveiro, Portugal, October 12-15},
  editor    = {Luis Seabra Lopes and
               Nuno Lau and
               Pedro Mariano and
               Luis Mateus Rocha},
}

@InProceedings{markers09,
  author =       {Alberto Simões and José João Almeida},
  title =        {Bilingual Example Segmentation based on Markers
Hypothesis},
  booktitle =   {I Iberian SLTech 2009},
  editor  = { António Teixeira and Miguel Sales Dias and Daniela Braga},
  year =         {2009},
  address = {Porto Salvo, Portugal},
  month = {September, 3--4},
  isbn = {978-989-96278-1-9},
  pages = {95--98},
  lang = {EN},
  abstract = {  The Marker Hypothesis was first defined by Thomas Green
in 1979.  It
  is a psycho-linguistic hypothesis defining that there is a set of
  words in every language that marks boundaries of phrases in a
  sentence. While it remains a hypothesis because nobody has proved
  it, tests have shows that results are comparable to basic shallow
  parsers with higher efficiency.

  The chunking algorithm based on the Marker Hypothesis is simple,
  fast and almost language independent. It depends on a list of
  closed-class words, that are already available for most languages.
  This makes it suitable for bilingual chunking (there is not the
  requirement for separate language shallow parsers).

  This paper discusses the use of the Marker Hypothesis combined
  with Probabilistic Translation Dictionaries for example-based machine
  translation resources extraction from parallel corpora.},
}


%%=============================================== 2010

@InProceedings{xata2010-rewritexml,
  author =       {Alberto Simões and José João Almeida},
  title =        {Processing {XML:} a rewriting system approach},
  booktitle =    {{XATA 2010} --- 8ª Conferência Nacional em XML, Aplicações e
Tecnologias Aplicadas },
  pages =        {27--38},
  year =         {2010},
  editor =       {Alberto Simões and Daniela da Cruz and José Carlos Ramalho},
  address =      {Vila do Conde},
  month =        {Maio},
  lang =         {EN},
  abstract =     { Nowadays XML processing is performed using one of
                  two approaches: using the SAX (Simple API for XML)
                  or using the DOM (Document Ob ject Model). While
                  these two approaches are adequate for most cases
                  there are situations where other approaches can make
                  the solution easier to write, read and, therefore,
                  to maintain.  This document presents a rewriting
                  approach for XML documents processing, focusing
                  the tasks of transforming XML documents (into other
                  XML formats or other textual documents) and the task
                  of rewriting other textual formats into XML
                  dialects.  These approaches were validated with some
                  case studies, ranging from an XML authoring tool to
                  a dictionary publishing mechanism.  }
}

@article{ocr2010,
 author = {Brett Drury and José João Almeida},
title = {A Case Study of Rule Based and Probabilistic Word Error Correction of
Portuguese OCR Text in a "Real World" Environment for Inclusion in a Digital
Library},
   journal={International Journal of Computational Linguistics}, 
   note={presented in {CICLING2010}},
   Volume ={1},
   Number ={1-2}, 
   pages = {307--315},
   year={2010},
   url = "http://10.255.0.115/pub/2010/DA10"
}

@InProceedings{lrec10:bigorna,
  author = {José João Almeida and André Santos and Alberto Simões},
  title = {Bigorna -- A Toolkit for Orthography Migration Challenges},
  booktitle = {Proceedings of the Seventh conference on International Language
Resources and Evaluation (LREC'10)},
  shortin = {{LREC}},
  year = {2010},
  month = {may},
  date = {19-21},
  address = {Valletta, Malta},
  editor = {Nicoletta Calzolari and others},
  publisher = {European Language Resources Association (ELRA)},
  isbn = {2-9517408-6-7},
  language = {english}
}

@InProceedings{lrec10:dicaberto,
  author = {Alberto Simões and José João Almeida and Rita Farinha},
  title = {Processing and Extracting Data from Dicionário Aberto},
  booktitle = {Proceedings of the Seventh conference on International Language
Resources and Evaluation (LREC'10)},
  year = {2010},
  shortin = {{LREC}},
  month = {may},
  date = {19-21},
  address = {Valletta, Malta},
  editor = {Nicoletta Calzolari and others}, 
  publisher = {European Language Resources Association (ELRA)},
  isbn = {2-9517408-6-7},
  language = {english}
 }

@InProceedings{bucc2010,
  author =       {José João Almeida and Alberto Simões},
  title =        {Automatic Parallel Corpora and Bilingual Terminology extraction from Parallel WebSites },
  booktitle =    {BUCC2010 -- 3rd Workshop on Building and Using Comparable Corpora, lrec2010},
  pages =        {50--55},
  year =         {2010},
  editor =       {Reinhard Rapp and Pierre Zweigenbaum and Serge Sharoff},
  address =      {Valletta, Malta},
  month =        {May},
  lang =         {EN},
  url = {http://alfarrabio.di.uminho.pt/~albie/publications/bucc2010.pdf},
  abstract = { In our days, the notion, the importance and the
                  significance of parallel corpora is so big that needs
                  no special introduction. Unfortunately, public
                  available parallel corpora is somewhat limited in
                  range. There are big corpora about politics or
                  legislation, about medicine and other specific areas,
                  but we miss corpora for other different
                  areas. Currently there is a huge investment on using
                  the Web as a corpus.  This article uncovers GWB, a
                  tool that aims automatic construction of parallel
                  corpora from the web. We defend that it is possible
                  to build high quality terminological corpora in an
                  automatic fashion, just by specifying a sensible
                  Internet domain and using an appropriate set of seed
                  keywords. GWB is a web-spider that works in
                  conjunction with a set of other Open-Source tools,
                  de¿ning a pipeline that includes the documents
                  retrieval from the web, alignment at sentence level
                  and its quality analysis, bilingual dictionaries and
                  terminology extraction and construction of off-line
                  dictionaries.  }
}

@InProceedings{brett:lrec,
  author =       {José João Almeida and Brett Drury},
  title =        { Identification, extraction and population of collective named
entities from business news},
  booktitle =    {Entity2010 -- Workshop on Resources and Evaluation for Entity Resolution and Entity
Management, lrec2010},
  pages =        {19--22},
  year =         {2010},
  address =      {Valletta, Malta},
  month =        {May},
  lang =         {EN},
  abstract = { 
Sentiment analysis of business news has become an increasingly popular
area of research for both the practitioner and academic. The future
financial prospects of companies can be estimated through the aggregation
of sentiment over a period of time. The aggregation of sentiment
for a specific company is only possible if the company is explicitly
mentioned in the news text. In certain instances, news text may refer
to groups or collections of companies, for example "The Automotive
Sector" or "The Russell Group of Universities".  Widely available named
entity dictionaries will not recognize these groups of companies, and
consequently, it may not be possible to assign sentiment attributed
to these groups of companies to their individual members. This paper
describes a method for identifying groups of companies, which for the
purposes of this paper will be known as "Collective Entities". The
described method is corpus based: it uses linguistic patterns to
identify Collective Entity Names, their members and their natural
relations with other Collective Entities. The described methodology
contains the following steps: 1. Identify and validate seed extraction
patterns, 2. Expand seed patterns, 3. Extract and validate Collective
Named Entities, 4. Extract related Collective Named Entities, 5. Construct
and populate an Ontology and 6. Expand the members of Collective Entity
sets with Linked Data. }
}

@InProceedings{fala2010-triPsi,
 author =       {João Filipe Machado and José João Almeida and Alberto Simões
and Ana Soares},
 title =        {Automating psycholinguistic statistics computation:
Procura-Palavras },
 year =         {2010},
 booktitle =    {FALA2010 -- II Iberian SLTech Workshop},
 editor =       {Carmen Mateo and Francisco Díaz and Francisco Pazó},
 address =      {Vigo},
 pages =        {217--220},
 month =        {November},
 abstract = {
This article describes psycholinguistic lexical databases
available in various languages, including English, Spanish and
Portuguese. These lexical databases are important for researchers 
in Psycholinguistics and other related areas, providing
a pool of experimental materials and allowing for an efficient
process of selection of these experimental materials.
The process of gathering statistics is slow, resulting in a
small pool of materials in the short-term. The need to find an
alternative method to gather limited or yet unavailable statistics
for a specific language led us to consider gathering statistics
from other languages and to compute their triangulation. Our
aim was to automatize the computation of statistics such as 
Familiarity, Imageability, Age of Acquisition and Written Word
Frequency for that specific language.
We will describe the process of preparing this data and triangulating and 
comparing statistics for some languages in an attempt of finding a 
relationship between them. The results were
analysed considering correlations between each statistic in each
pair of languages and by computing the mean of absolute differences between 
each language's values.
 }
}

@article{opencert2010,
  author    = {Alberto Simões and Nuno Carvalho and José João Almeida},
  title     = {Testing as a Certification Approach},
  journal   = {Electronic Communications of the EASST},
  volume    = {33},
  year      = {2010},
  editor    = {Luis Barbosa and Antonio Cerone and Siraj Shaikh (Guest Eds.)},
  note      = {Foundations and Techniques for Open Source Software Certification},
  url       = {http://journal.ub.tu-berlin.de/index.php/eceasst/article/view/458/446}
}

@Article{p-pal-linguamatica,
  author = {Ana Paula Soares and Montserrat Comesaña and Álvaro Iriarte
Sanroman and José João Almeida and Alberto Manuel Brandão Simões and Ana Costa,
Patrícia Cunha França and João Machado },
  title =        {{P-PAL:} {U}ma base lexical com índices psicolinguísticos do
{P}ortuguês {E}uropeu},
  journal =      {Linguamática},
  year =         {2010},
  volume =       {2},
  number =       {3},
  pages =        {67--72},
  month =        {December},
  issn =         {1647--0818},
  url = {http://linguamatica.com/index.php/linguamatica/article/download/80/108},
  irreditor =       {Alberto Simões and José João Almeida and Xavier Gómez
Guinovart},
  abstract = {
      Neste trabalho apresentamos o projecto Procura-PALavras (P-PAL)
      cujo principal objectivo é desenvolver uma ferramenta
      electrónica que disponibilize informação sobre índices
      psicolinguísticos objectivos e subjectivos de palavras do
      Português Europeu (PE). O P-PAL será disponibilizado
      gratuitamente à comunidade científica num formato amigável a
      partir de um sítio na Internet a construir para o efeito. Ao
      utilizar o P-PAL, o investigador poderá fazer uma utilização
      personalizada do programa ao seleccionar, da ampla variedade de
      análises oferecidas, os índices que se adequam aos propósitos da
      sua investigação e numa dupla funcionalidade de utilização:
      pedir ao programa para analisar listas de palavras previamente
      constituídas nos índices considerados relevantes para a
      investigação ou para obter listas de palavras que obedeçam aos
      parâmetros definidos. O P-PAL assume-se assim como uma
      ferramenta fundamental à promoção e internacionalização da
      investigação em Portugal.
  }
}

%%=============================================== 2011


@InProceedings{drury-torgo-almeida:2011:ROBUS,
  author    = {Drury, Brett  and  Torgo, Luis  and  J.J. Almeida},
  title     = {Guided Self Training for Sentiment Classification},
  booktitle = {Proceedings of Workshop on Robust Unsupervised and Semisupervised
Methods in Natural Language Processing},
  month     = {September},
  year      = {2011},
  address   = {Hissar, Bulgaria},
  pages     = {9--16},
  url       = {http://www.aclweb.org/anthology/W11-3902},
}

@inproceedings{drury1,
  title={Classifying News Stories to Estimate the Direction of a Stock Market Index},
  author={Brett Drury and Luis Torgo and J.J. Almeida },
  booktitle={Third Workshop on Intelligent Systems and Applications (WISA)},
  year=2011,
  location = {Chaves},
  pages = {1-4},
}


@inproceedings{drury2,
  title={ Magellan: An Adaptive Ontology Driven "breaking Financial News"
Recommender},
  author={ Brett Drury and  J.J. Almeida and Helena Morais},
  year=2011,
  booktitle = {CISTI-2011},
  location = {Chaves},
}

@inproceedings{drury3,
  title     = {An Error Correction Methodology for Time Dependent Ontologies},
  author={ Brett Drury and  J.J. Almeida and Helena Morais},
  booktitle = {{CAiSE} Workshops (ONTOSE)},
  year      = {2011},
  pages     = {501-512},
  ee        = {http://dx.doi.org/10.1007/978-3-642-22056-2_52},
  editor    = {Camille Salinesi and Oscar Pastor},
  publisher = {Springer},
  series    = {Lecture Notes in Business Information Processing},
  volume    = {83},
  part      = {8},
  isbn      = {978-3-642-22055-5},
}


@inproceedings{nuno1,
  title={ Oml: A Scripting Approach For Manipulating Ontologies},
  author={ Nuno Carvalho and  Alberto Simões and J.J. Almeida},
  booktitle = {CISTI-2011},
  location = {Chaves},
  year=2011,
}

@InProceedings{corta2011-pftl,
  author    = {Nuno Carvalho and Alberto Simões and José João Almeida and
Pedro Rangel Henriques and Maria João Varanda Pereira},
  title     = {{PFTL}: A Systematic Approach For Describing Filesystem Tree
Processors},
  booktitle = {INForum'11 --- Simpósio de Informática (CoRTA2011 track)},
  editor    = {Raul Barbosa and Luis Caires},
  publisher = {Dep. de Eng. Informática da Universidade de Coimbra},
  pages     = {222--233},
  isbn      = {978-989-96001-5-7},
  address   = {Coimbra, Portugal},
  year      = {2011},
  month     = {Setembro},
  language  = {EN},
  pdf={http://ambs.perl-hackers.net/publications/corta2011-pftl.pdf},
  abstract  = {  Today, most
 developers prefer to store information in databases. But
  plain filesystems were used for years, and are still used, to store
  information, commonly in files of heterogeneous formats that are
  organized in directory trees. This approach is a very flexible and
  natural way to create hierarchical organized structures of
  documents.

  We can devise a formal notation to describe a filesystem tree structure,
  similar to a grammar, assuming that filenames can be considered terminal
  symbols, and directory names non-terminal symbols. This specification
  would allow to derive correct language sentences (combination of terminal
  symbols) and to associate semantic actions, that can produce arbitrary
  side effects, to each valid sentence, just as we do in common parser
  generation tools. These specifications can be used to systematically
  process files in directory trees, and the final result depends on the
  semantic actions associated with each production rule.

  In this paper we revamped an old idea of using a domain specific
  language to implement these specifications similar to context free
  grammars. And introduce some examples of applications that can be
  built using this approach.
  },
}

@InProceedings{corta2011-oml,
  author    = {Nuno Carvalho and José João Almeida and Alberto Simões},
  title     = {Weaving {OML} in a General Purpose Programming Language},
  booktitle = {INForum'11 --- Simpósio de Informática (CoRTA2011 track)},
  editor    = {Raul Barbosa and Luis Caires},
  publisher = {Dep. de Eng. Informática da Universidade de Coimbra},
  isbn      = {978-989-96001-5-7},
  address   = {Coimbra, Portugal},
  pdf       = {http://ambs.perl-hackers.net/publications/corta2011-oml.pdf},
  year      = {2011},
  month     = {Setembro},
  language  = {EN},
  pages     = {184--197},
  abstract  = {
  Most existing programming languages can be categorized as general
  purpose programming languages, meaning that they can be used to
  implement solutions for any given domain. They are not, in any way,
  optimized for a specific set of problems. In contrast, Domain
  Specific Languages (DSL) are used to solve specific problems in a
  well defined domain. DSL are optimized to a particular set of
  problems, but they lack support for a wide range of operations that
  are required when dealing with real world problems. So, in a
  perfect world, we would like to implement applications using a
  general purpose programming language, but use a set of different DSL
  to handle specific domains' tasks.

  In this paper we describe a DSL named Ontology Manipulation Language
  (OML), designed to describe operations over
  with ontologies. Programs can be written
  using only the OML syntax and be executed independently. OML syntax
  was designed to deal with ontologies and the language itself is
  optimized to perform these tasks, which means that other relatively
  simpler tasks can not be easily done. To overcome this challenge a
  mechanism was developed so that you can weave small snippets of OML code
  inside Perl programs, meaning we have the power of OML to manipulate
  ontologies and, at the same time, all the paraphernalia of modules
  that Perl offers to handle everything else.
  },
}


@inproceedings{wims2011,
  author    = {Brett Drury and J.J. Almeida},
  title     = {Identification of fine grained feature based event and sentiment
               phrases from business news stories},
  booktitle = {WIMS},
  year      = {2011},
  pages     = {27--34},
  ee        = {http://doi.acm.org/10.1145/1988688.1988720},
  editor    = {Rajendra Akerkar},
  booktitle_full  = {Proceedings of the International Conference on Web
Intelligence, Mining and Semantics, WIMS 2011, Sogndal, Norway, May 25
               - 27, 2011},
  publisher = {ACM},
  isbn      = {978-1-4503-0148-0},
  bibsource = {DBLP, http://dblp.uni-trier.de},
}

@inproceedings{sepln:bookcleaner,
  author={ Santos, André and José João Almeida} ,
  title = {{Text::Perfide::BookCleaner}, a Perl module to
clean and normalize plain text books}, 
  booktitle = {Actas del XXVII Congreso de la Sociedad Española
para el Procesamiento del Lenguaje Natural},
  year= 2011,
  pp={433-441},
  location = {Huelva, 5 - 7 Set},
  url ={http://natura.di.uminho.pt/~jj/pln/sepln2011-boolcleaner.pdf},
}

@article{drury4,
  author={ Brett Drury and  J.J. Almeida and Helena Morais},
  title     = {Construction and maintenance of a fuzzy temporal ontology
               from news stories},
  journal   = {IJMSO},
  journalfull={International Journal of Metadata, Semantics and Ontologies},
  volume    = {6},
  number    = {3/4},
  year      = {2011},
  pages     = {219-233},
  doi       = {http://dx.doi.org/10.1504/IJMSO.2011.048028},
}

@InProceedings{xml2pm-xata2011,
  author = {Nuno Carvalho and Alberto Simões and José João Almeida},
  title  = {xml2pm: A Tool for Automatic Creation of Object Definitions Based
on {XML} Instances},
  booktitle = {{XATA 2011} --- 9ª Conferência Nacional em XML, Aplicações e
Tecnologias Aplicadas },
  year = {2011},
  pages = {103--114},
  isbn = {978-989-96863-1-1},
  editor = {Alberto Simões},
  month = {1--2 June},
  address = {Vila do Conde, Portugal},
  pdf={http://ambs.perl-hackers.net/publications/xml2pm-xata2011.pdf},
  lang = {EN},
  abstract = {
The eXtensible Mark-up Language (XML) is probably one of the
most popular markup languages available today. It is very typical to find all
kind
of services or programs representing data in this format. This situation is
even
more common in web development environments or Service Oriented Architectures
 (SOA), where data flows from one service to another, being consumed and
produced by an heterogeneous set of applications, which sole requirement is to
understand XML.

This workflow of data represented in XML implies some tasks that applications
have to perform if they are required to consume or produce information: the
task of parsing an XML document, giving specific semantics to the information
parsed, and the task of producing an XML document.

Our main goal is to create object definitions that can analyze an XML document
and automatically create an object definition that can be used abstractly by
the
application. These objects are able to parse the XML document and gather all
the
data required to mimic all the information present in the document.
This paper introduces xml2pm, a simple tool that can inspect the structure of
an XML document and create an object definition (a Perl module) that stores
the
same information present in the orinial document, but as a runtime object. We
also
introduce a simple case of how this approach allows the creation of
applications
based on Web Services in an elegant and simple way.
  }
}


@article{drury5,
  author={ Brett Drury and Luis Torgo and  J.J. Almeida},
  title     = {Classifying News Stories with a Constrained Learning Strategy
               to Estimate the Direction of a Market Index},
  journal_full={International Journal of Computer Science and Applications},
  journal   = {IJCSA},
  volume    = {9},
  number    = {1},
  year      = {2012},
  pages     = {1-22},
  url       = {http://www.tmrfindia.org/ijcsa/v9i11.pdf},
  bibsource = {DBLP, http://dblp.uni-trier.de}
}

@article{da2012,
  title = {Dicionário-Aberto -- A Source of Resources for the
Portuguese Language Processing},
  author = {Alberto Simões and Álvaro Iriarte Sanromán and José João Almeida},
  year = 2012,
  volume = {7243},
  editor = {Helena Caseli and Aline Villavicencio and António Teixeira
and Fernando Perdigão},
  address = {Coimbra, Portugal},
  pages = {121--127},
  publisher = {Springer},
  month = {April},
  journal = {Computational Processing of the Portuguese Language,
Lecture Notes for Artificial Intelligence},
}


@InProceedings{LREC12.967,
  author = {André Santos and José João Almeida and Nuno Carvalho},
  title = {Structural alignment of plain text books},
  booktitle = {Proceedings of the Eight International Conference on Language
Resources and Evaluation (LREC'12)},
  year = {2012},
  month = {may},
  date = {23-25},
  address = {Istanbul, Turkey},
  editor = {Nicoletta Calzolari and others}, 
  publisher = {European Language Resources Association (ELRA)},
  isbn = {978-2-9517408-7-7},
  language = {english}
 }

@InProceedings{LREC12.611,
  author = {Brett Drury and José João Almeida},
  title = {The Minho Quotation Resource},
  booktitle = {Proceedings of the Eight International Conference on Language
Resources and Evaluation (LREC'12)},
  year = {2012},
  month = {may},
  date = {23-25},
  address = {Istanbul, Turkey},
  editor = {Nicoletta Calzolari and others}, 
  publisher = {European Language Resources Association (ELRA)},
  isbn = {978-2-9517408-7-7},
  language = {english}
 }

@InProceedings{CAPH12a,
  author =    {Nuno Ramos Carvalho and Jose Joao Almeida and Maria
João Varanda Pereira and Pedro Rangel Henriques},
  title =     {Probabilistic SynSet Based Concept Location},
  booktitle = {SLATe'12 --- Symposium on Languages, Applications and Technologies},
  irreditor = {Alberto Simões and Ricardo Queirós and Daniela da Cruz},
  publisher = {OASIC -- Open Access Series in Informatics, Schloss
Dagstuhl - Leibniz-Zentrum für Informatik, Dagstuhl Publishing, Germany},
  year =     {2012},
  month =     {June},
  volume = {21},
  pages     = {239-253},
  ISSN      = {978-3-939879-40-8},
  DOI       = {10.4320/OASIcs.SLATE.2012.I},
  abstract  = {Concept location is a common task in program comprehension
  techniques, essential in many approaches used for software care and
  software evolution. An important goal of this process is to discover
  a mapping between source code and human oriented concepts.

  Although programs are written in a strict and formal language, natural
  language terms and sentences like identifiers (variables or functions
  names), constant strings or comments, can still be found embedded in
  programs. Using terminology concepts and natural language processing
  techniques these terms can be exploited to discover clues about which
  real world concepts source code is addressing.

  This work extends symbol tables build by compilers with ontology
  driven constructs, extends synonym sets defined by linguistics, with
  automatically created Probabilistic SynSets from software
  domain parallel corpora. And using a relational algebra, creates
  semantic bridges between program elements and human oriented concepts,
  to enhance concept location tasks.}
}


@article{wikiscore,
  title={{Wiki::Score} A collaborative environment for music transcription and
  publishing},
  author={J.J. Almeida and Nuno Ramos Carvalho and José Nuno Oliveira},
  journal_small={ISU},
  journal={Information, Services and Use (ISU)},
  volume={31},
  number={3-4/2011},
  year={2012},
  pages={177--187},
  ee={DOI	10.3233/ISU-2012-0647},
  publisher={IOS Press},
  ISSN={0167-5265 (Print) 1875-8789 (Online)},
  comment={elpub 2012},
}

@InProceedings{flapp,
  author =  {Alberto Simões and Nuno Ramos Carvalho and José João Almeida},
  title =   {{Generating flex Lexical Scanners for Perl Parse::Yapp}},
  booktitle =   {1st Symposium on Languages, Applications and Technologies },
  pages =   {41--50},
  series =  {OpenAccess Series in Informatics (OASIcs)},
  ISBN =    {978-3-939897-40-8},
  ISSN =    {2190-6807},
  idx = {DBLP},
  year =    {2012},
  volume =  {21},
  irreditor =  {Alberto Simões and Ricardo Queirós and Daniela da Cruz},
  publisher =   {Schloss Dagstuhl--Leibniz-Zentrum fuer Informatik},
  address = {Dagstuhl, Germany},
  url =     {http://drops.dagstuhl.de/opus/volltexte/2012/3513},
  doi =     {http://dx.doi.org/10.4230/OASIcs.SLATE.2012.41},
abstract = {
Perl is known for its versatile regular expressions. Nevertheless, using Perl regular
expressions for creating fast lexical analyzer is not easy. As an alternative, the authors
defend the automated generation of the lexical analyzer in a well known fast application
(flex) based on a simple Perl definition in the syntactic analyzer. In this paper we
extend the syntax used by Parse::Yapp, one of the most used parser generators for Perl,
making the automatic generation of flex lexical scanners possible. We explain how this is
performed and conclude with some benchmarks that show the relevance of the approach.
}
}


@inproceedings{DBLP:conf/slate/DruryA12,
  author    = {Brett Drury and José João Almeida},
  title     = {Predicting Market Direction from Direct Speech by Business
               Leaders},
  booktitle = {SLATE},
  publisher = {Schloss Dagstuhl - Leibniz-Zentrum fuer Informatik},
  series    = {OASICS},
  year      = {2012},
  volume    = {21},
  pages     = {163-172},
  doi       = {http://dx.doi.org/10.4230/OASIcs.SLATE.2012.163},
  irreditor    = {Alberto Sim{\~o}es and
               Ricardo Queir{\'o}s and
               Daniela Carneiro da Cruz},
  bibsource = {DBLP, http://dblp.uni-trier.de}
}


%%=================================== 2013

@inproceedings{ptd2013,
 title={Defining a Probabilistic Translation Dictionaries Algebra},
 Author={ Alberto Simões and José João Almeida and Nuno Ramos Carvalho},
 Booktitle={ XVI Portuguese Conference on Artificial Inteligence - EPIA},
 Year= 2013,
 Month={ September},
 pages={444--455}, 
 irreditor = {Luís Correia and Luís Paulo Reis and José Cascalho and Luís
Gomes and Hélia Guerra and Pedro Cardoso},
 address = {Angra do Heroismo, Azores},
 url = {http://natura.di.uminho.pt/~jj/bib/ptd-algebra.pdf},
}


@inproceedings{algarve-cross2013,
 title={Open Source Software Documentation Mining for Quality Assessment},
 Year={ 2013},
 Isbn={ 978-3-642-36980-3},
 Booktitle={ Advances in Information Systems and Technologies},
 Volume={ 206},
 Series={ Advances in Intelligent Systems and Computing},
 Editor={ Rocha, Álvaro and Correia, Ana Maria and Wilson, Tom and Stroetmann,
Karl A.},
 Publisher={ Springer Berlin Heidelberg},
 Author={ Nuno Ramos Carvalho and Alberto Simões and José João Almeida},
 Pages={785--794},
 Abstract={ Besides source code, the fundamental source of information about Open Source
Software lies in documentation, and other non source code files, like README,
INSTALL, or HowTo files, commonly available in the software ecosystem. These
documents, written in natural language, provide valuable information during the
software development stage, but also in future maintenance and evolution tasks.
DMOSS is a toolkit designed to systematically assess the quality of non source
code text found in software packages. The toolkit handles a package as an
attribute tree, and performs several tree traverse algorithms through a set of
plugins, specialized in retrieving specific metrics from text, gathering
information about the software. These metrics are later used to infer knowledge
about the software, and composed together to build reports that assess the
quality of specific features of the software. This paper discusses the
motivations for this work, continues with a description of the toolkit
implementation and design goals. Follows an example of its usage to process a
software package, and the produced report. Finally some final remarks and
trends for future work are presented.},
 url={http://alfarrabio.di.uminho.pt/~albie/publications/wcist2012-dmoss.pdf },
}


@inproceedings{algarve2013,
  title={Evaluating Web Site Structure Based on Navigation Profiles and Site Topology},
  Author={Alberto Simões and Anália Lourenço and José João Almeida},
  Year={ 2013},
  Isbn={ 978-3-642-36980-3},
  Booktitle={ Advances in Information Systems and Technologies},
  Volume={ 206},
  Series={ Advances in Intelligent Systems and Computing},
  Editor={ Rocha, Álvaro and Correia, Ana Maria and Wilson, Tom and Stroetmann,
Karl A.},
  Publisher={ Springer Berlin Heidelberg},
  Pages={ 305-311},
  Abstract={ This work aims at pointing out the benefits of a topology-oriented
wide scope, but differentiated, profile analysis. The goal was to conciliate
advanced common website usage profiling techniques with the analysis of the
website's topology information, outputting valuable knowledge in an intuitive
and comprehensible way. Server load balancing, crawler activity evaluation and
Web site restructuring are the primary analysis concerns and, in this regard,
experiments over six month data of a real-world Web site were considered
successful. },
  url={http://alfarrabio.di.uminho.pt/~albie/publications/wcist2012-webtopology.pdf},
}

@inproceedings{Passarola2013,
  title={PASSAROLA: High-Order Exercise Generation },
  author={J.João Almeida and Isabel Araújo and  Irene Brito and Nuno Carvalho and
            Gaspar J. Machado and Rui M.S. Pereira and Georgi Smirnov},
  year=2013,
  booktitle = {CISTI-2013},
  location = {Lisboa},
  pages = { 763--768},
  url = "http://natura.di.uminho.pt/~jj/bib/passarola-cisti2013.pdf",
  abstract={In order to be robust and achieve multi-domain
coverage, exercise generation systems usually work with answers
of simple types (e.g. multiple-choice, Boolean, integer, or file
comparison). In this paper we describe an exercise generation
system PASSAROLA, a simple, yet powerful, language that anyone
with no computer science background, can use to develop
exercises, that include a collection of heterogeneous objects, and
allows the usage of complex elements. Its main characteristic
features are the use of simple reusable templates, simple and rich
types, rich notation and syntax (LaTeX based) for questions,
solutions, and answers, transformations and calculations,
external calculators.},
}

@inproceedings{ticames2013,
  title={Math exercise generation and smart assessment},
  author={J.João Almeida and Isabel Araújo and  Irene Brito and Nuno Carvalho and
            Gaspar J. Machado and Rui M.S. Pereira and Georgi Smirnov},
  year=2013,
  booktitle = {Workshop of TICAMES (Information and Communication Technology in
     Higher Education: Learning Mathematics), CISTI-2013},
  location = {Lisboa},
  pages = { 1014--1019},
  url = "http://natura.di.uminho.pt/~jj/bib/passarola-ticames2013.pdf",
  abstract={In this paper we concentrate on the field of
mathematics education where the aim is to generate exercises
going beyond those with answers of simple types (e.g. multiple-choice, 
Boolean, integer, or file comparison). We present three
examples from introductory college mathematics and emphasize
the key points that should be taken into account in order to
develop a "well-posed" exercise together with its verification. All
the presented examples were implemented in the system},
}

@inproceedings{crossportal,
  author    = {Pedro Martins and
               Nuno Ramos Carvalho and
               João Paulo Fernandes and
               José João Almeida and
               João Saraiva},
  title     = {A Framework for Modular and Customizable Software Analysis},
  booktitle = {ICCSA (2)},
  year      = {2013},
  pages     = {443-458},
  doi        = {http://dx.doi.org/10.1007/978-3-642-39643-4_32},
  offcrossref  = {DBLP:conf/iccsa/2013-2},
  bibsource = {DBLP, http://dblp.uni-trier.de},
  editor    = {Beniamino Murgante and others},
  irrbooktitle = {Computational Science and Its Applications - ICCSA 2013
               - 13th International Conference, Ho Chi Minh City, Vietnam,
               June 24-27, 2013, Proceedings, Part II},
  booktitle = {ICCSA (2)},
  publisher = {Springer},
  series    = {Lecture Notes in Computer Science},
  volume    = {7972},
  isbn      = {978-3-642-39642-7},
  ee        = {http://dx.doi.org/10.1007/978-3-642-39643-4},
}

@inproceedings{icaicte13,
  title = {Exercise generation with the system Passarola},
  booktitle={ICAICTE-13, Advances in Intelligent Systems Research},
  isbn={ 978-90786-77-79-6},
  issn={ 1951-6851},
  doi={doi:10.2991/icaicte.2013.64},
  url={http://natura.di.uminho.pt/~jj/bib/ecaicte2013.pdf},
  author={José João Almeida and Isabel Araújo and Irene Brito and Nuno Carvalho and Gaspar J.
    Machado and  Rui M. S.  Pereira and  Georgi Smirnov},
  year={2013},
  keywords={ Passarola, exercise generation system, self-regulating study},
  abstract={ A robust multi-domain coverage exercise generation system
usually works with an-swers of simple types (e.g. multiple-choice,
Boolean, integer, or file compari-son). In this paper we describe
Passarola, a simple, yet powerful, exercise genera-tion system and its
language that anyone with no computer science background can use to
develop exercises. It may include a collection of heterogeneous objects
allowing the usage of complex elements. Its main characteristics are the
use of simple reusable templates, simple and rich types, and rich notation
and syntax (LaTeX based) for questions, solutions, and answers.}, 
}

@inproceedings{slate/AzevedoA13,
  author    = {Bruno M. Azevedo and
               José João Almeida},
  title     = {ABC with a UNIX Flavor},
  s_booktitle = {SLATE},
  year      = {2013},
  pages     = {203-218},
  doi       = {http://dx.doi.org/10.4230/OASIcs.SLATE.2013.203},
  url       = {http://drops.dagstuhl.de/opus/volltexte/2013/4039/pdf/14.pdf},
  irreditor  = {José Paulo Leal and
               Ricardo Rocha and
               Alberto Simões},
  booktitle = {2nd Symposium on Languages, Applications and Technologies,
               SLATE 2013, June 20-21, 2013 - Porto, Portugal},
  publisher = {Schloss Dagstuhl - Leibniz-Zentrum fuer Informatik},
  series    = {OASICS},
  volume    = {29},
  year      = {2013},
  isbn      = {978-3-939897-52-1},
  bibsource = {DBLP, http://dblp.uni-trier.de},
  abstract  = {
ABC is a simple, yet powerful, textual musical notation.
This paper presents ABC::DT, a rule-based domain-specific
language (Perl embedded), designed to simplify the
creation of ABC processing tools. Inspired by the Unix philosophy, 
those tools intend to be simple and compositional in a Unix filters' way.
From ABC::DT's rules we obtain an ABC processing tools whose main 
algorithm follows a traditional compiler architecture, thus consisting of 
three stages: 
1) ABC parser (based on abcmtops parser), 
2) ABC semantic transformation (associated with ABC attributes), 
3) output generation (either a user defined or system provided ABC generator).
  },
}

@article{escolex2013,
  author = { Soares, Ana Paula and José Carlos Medeiros and Alberto Simões
    and João Machado and Ana Costa and Álvaro Iriarte and José João Almeida
    and Ana P. Pinheiro and and Montserrat Comesaña},
  year = 2013,
  title = { Escolex: A grade-level lexical database from european portuguese 
      elementary to middle school textbooks.},
  journal = {Behavior Research Methods}, 
  pages = {1--14},
  url = {http://p-pal.di.uminho.pt/static/files/db/Soares_et_al.__in_press_ESCOLEX.pdf},
  abstract = {
In this article, we introduce ESCOLEX, the first European Portuguese children's
lexical database with grade-level-adjusted word frequency statistics. Computed
from a 3.2-million-word corpus, ESCOLEX provides 48,381 word forms extracted
from 171 elementary and middle school textbooks for 6- to 11-year-old children
attendin' the first six grades in the Portuguese educational system. Like other
children's grade-level databases, ESCOLEX provides four frequency indices for
each grade: overall word frequency (F), index of dispersion across the selected
textbooks (D), estimated frequency per million words (U), and standard
frequency index (SFI). It also provides a new measure, contextual diversity
(CD). In addition, the number of letters in the word and its part(s) of speech,
number of syllables, syllable structure, and adult frequencies taken from P-PAL
(a European Portuguese corpus-based lexical database) are provided. ESCOLEX
will be a useful tool both for researchers interested in language processing
and development and for professionals in need of verbal materials adjusted to
children's developmental stages. ESCOLEX can be downloaded along with this
article or from http://p-pal.di.uminho.pt/about/databases.
  },
}


@inproceedings{coloquiosOutono2013,
  author    = {José João Almeida and Sílvia Araújo and Idalete Dias and Ana Correio},
  title     = {{Per-fide}: Projecto de compilação de um corpus multilingue},
  booktitle = {Humanidades: Novos Paradigmas do Conhecimento e da Investigação,
               XIV Colóquio de Outono},
  year      = {2013},
  pages     = {323--339},
  editor  = {Ana Gabriela Macedo and
             Carlos Mendes de Sousa and
             Vitor Moura},
  publisher = {húmus, Universidade do Minho},
}

%%=================================== 2014

@incollection{sardinha2014,
    author = { José João Almeida and Sílvia Araújo and Nuno Carvalho and 
        Idalete Dias and Ana Oliveira and André Santos and Alberto Simões},
    title = {The {Per-Fide} Corpus: A New Resource for Corpus-Based Terminology,
Contrastive Linguistics and Translation Studies},
    booktitle = {Working with Portuguese Corpora},
    publisher = {Bloomsbury Publishing},
    year = {2014},
    editor = {Tony Berber Sardinha and Telma São-Bento Ferreira},
    url={http://ambs.perl-hackers.net/publications/perfide_ch9_sardinha.pdf},
    month = {April},
    chapter = {9},
    pages = {177--200},
    isbn = {978-1441190505},
}

@article{SOARES2014,
   title = {{Procura-PALavras (P-Pal): uma nova medida de frequência lexical do português europeu contemporâneo}},
   journal = {{Psicologia: Reflexão e Crítica}},
   author={Soares, Ana Paula and Iriarte, Álvaro and Almeida, José João
and Simões, Alberto and Costa, Ana and França, Patricia and Machado, João and
Comesaña, Montserrat},
   ISSN = {0102-7972},
   language = {pt},
   url = {http://www.scielo.br/scielo.php?script=sci_arttext&pid=S0102-79722014000100013&nrm=iso},
   volume = {27},
   year = {2014},
   month = {03},
   pages = {110 - 123},
   publisher = {scielo},
   off_crossref = {10.1590/S0102-79722014000100013},
} 

@article{ppal2014,
    title = {Procura-PALavras (P-PAL): A new measure
of word frequency for contemporary European Portuguese | Procura-PALavras
(P-PAL): Uma nova medida de frequência lexical do Português Europeu
contemporâneo},
    journal = {{Psicologia: Reflexao e Critica}},
    year = {2014},
    volume = {27},
    number = {1},
    pages = {110-123},
    author = {Soares, A.P. and Iriarte, A. and Almeida, J.J. 
       and Simões, A. and Costa, A. and França, P. 
       and Machado, J. and Comesaña, M.},
    doi = {10.1590/S0102-79722014000100013}, 
    EID = {2-s2.0-84902185491},
}

@article{conclave-iccsa2104,
  author =	{Nuno Ramos Carvalho and José João Almeida and Maria
João Varanda Pereira and Pedro Rangel Henriques},
  title =	{{Conclave: Ontology-driven measurement of semantic relatedness
between source code elements and problem domain concepts}},
  journal = {Lecture Notes in Computer Science (including subseries Lecture
Notes in Artificial Intelligence and Lecture Notes in Bioinformatics)},
  offbooktitle = {14th International Conference on Computational Science and its
     Applications, ICCSA 2014; Guimaraes; Portugal},
  pages =	{116-131},
  year =	{2014},
  volume =	{8584 LNCS},
  number = {PART 6},
  publisher =	{Springer Verlag},
  address =	{},
  ISBN =	{978-331909152-5},
  doi =	{10.1007/978-3-319-09153-2_9},
  annote =	{Document Type: Conference Paper; SCOPUS}
}

@article{comsys-dmoss,
  author = {Carvalho, N. R. and Simões, A. and  Almeida, J. J.},
  title = {{DMOSS}: Open Source Software Documentation Assessment},
  journal = {Computer Science and Information Systems},
  volume = 11, 
  number = 4,
  year = 2014,
  pages = {1191-1207},
  abstract = {Besides source code, the fundamental source of information
   about open source software lies in documentation, and other non source
   code files, like README, INSTALL, or How-To files, commonly available in
   the software ecosystem. These documents, written in natural language,
   provide valuable information during the software development stage,
   but also in future maintenance and evolution tasks. DMOSS3 is a toolkit
   designed to systematically assess the quality of non source code content
   found in software packages. The toolkit handles a package as an attribute
   tree, and performs several tree traverse algorithms through a set of
   plugins, specialized in retrieving specific metrics from text, gathering
   information about the software. These metrics are later used to infer
   knowledge about the software, and composed together to build reports
   that assess the quality of specific features. This paper discusses the
   motivations for this work, continues with a description of the toolkit
   implementation and design goals. This is followed by an example of its
   usage to process a software package, and the produced report.},
  url = {http://www.comsis.org/archive.php?show=pprwc110-1308},
}

%% ver 2015
@article{jss-Carvalho2014,
  author = "Nuno Ramos Carvalho and José João Almeida and Pedro Rangel Henriques
      and Maria João Varanda",
  title = {From source code identifiers to natural language terms},
  journal = "Journal of Systems and Software ",
  volume = "",
  number = "0",
  year = "2014",
  issn = "0164-1212",
  doi = "http://dx.doi.org/10.1016/j.jss.2014.10.013",
  url = "http://www.sciencedirect.com/science/article/pii/S0164121214002179",
  keywords = "Program comprehension",
  keywords = "Natural language processing",
  keywords = "Identifier splitting ",
  abstract = "Abstract Program comprehension techniques often explore program
    identifiers, to infer knowledge about programs. The relevance of source code
    identifiers as one relevant source of information about programs is already
    established in the literature, as well as their direct impact on future
    comprehension tasks. Most programming languages enforce some constrains on
    identifiers strings (e.g., white spaces or commas are not allowed). Also,
    programmers often use word combinations and abbreviations, to devise strings
    that represent single, or multiple, domain concepts in order to increase
    programming linguistic efficiency (convey more semantics writing less). These
    strings do not always use explicit marks to distinguish the terms used (e.g.,
    CamelCase or underscores), so techniques often referred as hard splitting are
    not enough. This paper introduces Lingua::IdSplitter a dictionary based
    algorithm for splitting and expanding strings that compose multi-term
    identifiers. It explores the use of general programming and abbreviations
    dictionaries, but also a custom dictionary automatically generated from
    software natural language content, prone to include application domain terms
    and specific abbreviations. This approach was applied to two software packages,
    written in C, achieving a f-measure of around 90% for correctly splitting and
    expanding identifiers. A comparison with current state-of-the-art approaches is
    also presented. "
} 

@InProceedings{conclave-slate2014,
  author =	{Nuno Ramos Carvalho and José João Almeida and 
             Maria João Varanda Pereira and Pedro Rangel Henriques},
  title =	{{Conclave: Writing Programs to Understand Programs}},
  booktitle =	{3rd Symposium on Languages, Applications and Technologies},
  pages =	{19--34},
  series =	{OpenAccess Series in Informatics (OASIcs)},
  ISBN =	{978-3-939897-68-2},
  ISSN =	{2190-6807},
  year =	{2014},
  volume =	{38},
  irreditor =	{Maria João Varanda Pereira and José Paulo Leal and Alberto Simões},
  publisher =	{Schloss Dagstuhl--Leibniz-Zentrum fuer Informatik},
  address =	{Dagstuhl, Germany},
  URL =	{http://drops.dagstuhl.de/opus/volltexte/2014/4556},
  URN =	{urn:nbn:de:0030-drops-45561},
  doi =	{http://dx.doi.org/10.4230/OASIcs.SLATE.2014.19},
  annote =	{Keywords: software maintenance, software evolution, program comprehension, feature location, concept location, natural language processing}
}

@inproceedings{DBLP:conf/slate/BritoA14,
  author    = {Rui Brito and
               José João Almeida},
  title     = {A Workflow Description Language to Orchestrate Multi-Lingual Resources},
  booktitle = {3rd Symposium on Languages, Applications and Technologies, {SLATE}
               2014, June 19-20, 2014 - Bragança, Portugal},
  pages     = {77--83},
  year      = {2014},
  url       = {http://dx.doi.org/10.4230/OASIcs.SLATE.2014.77},
  doi       = {10.4230/OASIcs.SLATE.2014.77},
  biburl    = {http://dblp.uni-trier.de/rec/bib/conf/slate/BritoA14},
  irreditor    = {Maria João Varanda Pereira and
               José Paulo Leal and
               Alberto Simões},
  series    = {{OASICS}},
  volume    = {38},
  publisher = {Schloss Dagstuhl - Leibniz-Zentrum fuer Informatik},
  isbn      = {978-3-939897-68-2},
}


@inproceedings{DBLP:conf/slate/SimoesAB14,
  author    = {Alberto Simões and
               José João Almeida and
               Simon D. Byers},
  title     = {Language Identification: a Neural Network Approach},
  booktitle = {3rd Symposium on Languages, Applications and Technologies, {SLATE}
               2014, June 19-20, 2014 - Bragança, Portugal},
  pages     = {251--265},
  year      = {2014},
  url       = {http://dx.doi.org/10.4230/OASIcs.SLATE.2014.251},
  doi       = {10.4230/OASIcs.SLATE.2014.251},
  biburl    = {http://dblp.uni-trier.de/rec/bib/conf/slate/SimoesAB14},
  irreditor    = {Maria João Varanda Pereira and
               José Paulo Leal and
               Alberto Simões},
  series    = {{OASICS}},
  volume    = {38},
  publisher = {Schloss Dagstuhl - Leibniz-Zentrum fuer Informatik},
  isbn      = {978-3-939897-68-2},
}

@inproceedings{DBLP:conf/slate/CarvalhoA14,
  author    = {Pedro Carvalho and
               José João Almeida},
  title     = {MLT-prealigner: a Tool for Multilingual Text Alignment},
  booktitle = {3rd Symposium on Languages, Applications and Technologies, {SLATE}
               2014, June 19-20, 2014 - Bragança, Portugal},
  pages     = {283--290},
  year      = {2014},
  url       = {http://dx.doi.org/10.4230/OASIcs.SLATE.2014.283},
  doi       = {10.4230/OASIcs.SLATE.2014.283},
  biburl    = {http://dblp.uni-trier.de/rec/bib/conf/slate/CarvalhoA14},
  irreditor    = {Maria João Varanda Pereira and
               José Paulo Leal and
               Alberto Simões},
  series    = {{OASICS}},
  volume    = {38},
  publisher = {Schloss Dagstuhl - Leibniz-Zentrum fuer Informatik},
  isbn      = {978-3-939897-68-2},
}


@InProceedings{tmxa,
  author =       {Rui Brito and José João Almeida and Alberto Simões},
  title =        {Processing Annotated {TMX} Parallel Corpora},
  booktitle =    {IberSpeech 2014 --- VIII Jornadas en Tecnologías del Habla and IV Iberian SLTech Workshop},
  year =         {2014},
  month =        {November},
  address =      {Las Palmas de Gran Canaria, Spain},
  pages  =       {188--197},
  abstract =     {    In the later years the amount of freely available multilingual
      corpora has grown in an exponential way. Unfortunately the way these
      corpora are made available is very diverse, ranging from simple text
      files or specific XML schemas to supposedly standard formats like
      the XML Corpus Encoding Initiative, the Text Encoding Initiative, or
      even the Translation Memory Exchange formats.
      In this document we defend the usage of Translation Memory Exchange
      documents, but we enrich its structure in order to support the
      annotation of the documents with different information like lemmas,
      multi-words or entities.
      To support the adoption of the proposed formats, we present a set of
      tools to manipulate the different formats in an agile way.},
   url= {http://ambs.perl-hackers.net/publications/tmxa.pdf},
} 

%%========================= 2015

@article{jss-CarvalhoAHP15,
  author    = {Nuno Ramos Carvalho and
               Jos{\'{e}} Jo{\~{a}}o Almeida and
               Pedro Rangel Henriques and
               Maria Jo{\~{a}}o Varanda Pereira},
  title     = {From source code identifiers to natural language terms},
  journal   = {Journal of Systems and Software},
  volume    = {100},
  pages     = {117--128},
  year      = {2015},
  url       = {http://dx.doi.org/10.1016/j.jss.2014.10.013},
  doi       = {10.1016/j.jss.2014.10.013},
  timestamp = {Mon, 22 Dec 2014 09:51:10 +0100},
  biburl    = {http://dblp.uni-trier.de/rec/bib/journals/jss/CarvalhoAHP15},
  bibsource = {dblp computer science bibliography, http://dblp.org},
  keywords = "Program comprehension",
  keywords = "Natural language processing",
  keywords = "Identifier splitting ",
  abstract = "Abstract Program comprehension techniques often explore program
    identifiers, to infer knowledge about programs. The relevance of source code
    identifiers as one relevant source of information about programs is already
    established in the literature, as well as their direct impact on future
    comprehension tasks. Most programming languages enforce some constrains on
    identifiers strings (e.g., white spaces or commas are not allowed). Also,
    programmers often use word combinations and abbreviations, to devise strings
    that represent single, or multiple, domain concepts in order to increase
    programming linguistic efficiency (convey more semantics writing less). These
    strings do not always use explicit marks to distinguish the terms used (e.g.,
    CamelCase or underscores), so techniques often referred as hard splitting are
    not enough. This paper introduces Lingua::IdSplitter a dictionary based
    algorithm for splitting and expanding strings that compose multi-term
    identifiers. It explores the use of general programming and abbreviations
    dictionaries, but also a custom dictionary automatically generated from
    software natural language content, prone to include application domain terms
    and specific abbreviations. This approach was applied to two software packages,
    written in C, achieving a f-measure of around 90% for correctly splitting and
    expanding identifiers. A comparison with current state-of-the-art approaches is
    also presented. "
}

@article { acores-wordcist2015,
   title = {New algorithms for smart assessment of math exercises},
   journal = {Advances in Intelligent Systems and Computing},
   year = {2015},
   volume = {353},
   pages = {1221-1230},
   author = {Araújo, I. and Brito, I. and Machado, G.J. and Pereira, R.M.S. 
        and Almeida, J.J. and Smirnov, G.},

}

@inproceedings{ cisti-almeida2015,
   title = {Gröbner bases and mathematical exercises generation with nondetermined structure},
   titlept= { Bases de Gröbner e geração de exercícios matemáticos com estrutura não determinada},
   booktitle = {2015 10th Iberian Conference on Information Systems and Technologies, 
            CISTI 2015},
   year = {2015},
   author = {Araújo, I. and Smirnov, G. and Almeida, J.J.},
   url={http://www.scopus.com/inward/record.url?eid=2-s2.0-84943328958&partnerID=MN8TOARS},
}

@article { subtitles2015,
   title = {On the advantages of word frequency and contextual
      diversity measures extracted from subtitles: The case of Portuguese},
   journal = {Quarterly Journal of Experimental Psychology},
   year = {2015},
   volume = {68},
   number = {4},
   pages = {680-696},
   author = {Soares, A.P. and Machado, J. and Costa, A. and Iriarte, Á. 
      and Simões, A. and Almeida, J.J. and Comesaña, M. and Perea, M.},
}



@incollection{PULO:springer,
  year={2015},
  isbn={978-3-319-27652-6},
  booktitle={Languages, Applications and Technologies},
  volume={563},
  series={Communications in Computer and Information Science},
  irreditor={Sierra-Rodríguez, José-Luis and Leal, José-Paulo and Simões, Alberto},
  doi={10.1007/978-3-319-27653-3_5},
  title={Experiments on Enlarging a Lexical Ontology},
  publisher={Springer International Publishing},
  author={Simões, Alberto and Almeida, José João},
  pages={49--56},
  language={English}
} 

%%============================ 2016

@InProceedings{SIMES16.1052,
  author = {Alberto Simões and Xavier Gómez Guinovart and J. João Almeida},
  title = {Enriching a {P}ortuguese {WordNet} using Synonyms from a
Monolingual Dictionary},
  booktitle = {Proceedings of the Ninth International Conference on
Language Resources and Evaluation (LREC 2016)},
  year = {2016},
  month = {may},
  date = {23-28},
  address = {Portoroz, Slovenia},
  editor = {Nicoletta Calzolari (Conference Chair) and Khalid Choukri
and Thierry Declerck and Marko Grobelnik and Bente Maegaard and Joseph
Mariani and Asuncion Moreno and Jan Odijk and Stelios Piperidis},
  publisher = {European Language Resources Association (ELRA)},
  isbn = {978-2-9517408-9-1},
  language = {english}
 }

@InProceedings{almeida_et_al2016,
  author =  {José João Almeida and Eliana Grande and Georgi Smirnov},
  title =   {{Context-Free Grammars: Exercise Generation and Probabilistic
Assessment}},
  booktitle =   {5th Symposium on Languages, Applications and Technologies
(SLATE'16)},
  pages =   {1--8},
  series =  {OpenAccess Series in Informatics (OASIcs)},
  ISBN =    {978-3-95977-006-4},
  ISSN =    {2190-6807},
  year =    {2016},
  volume =  {51},
  offeditor =   {Marjan Mernik and José Paulo Leal and Hugo Gonçalo Oliveira},
  publisher =   {Schloss Dagstuhl--Leibniz-Zentrum fuer Informatik},
  offaddress =  {Dagstuhl, Germany},
  offURL =      {http://drops.dagstuhl.de/opus/volltexte/2016/6015},
  URN =     {urn:nbn:de:0030-drops-60159},
  doi =     {http://dx.doi.org/10.4230/OASIcs.SLATE.2016.10},
  annote =  {Keywords: Exercise generation, context-free grammars, assessment}
}

@article{cisti2016,
  title = {Architectural approaches to build the museum of the person},
  journal = {Iberian Conference on Information Systems and Technologies, CISTI},
  year = {2016},
  volume = {2016-July},
  doi={https://doi.org/10.1109/CISTI.2016.7521367},
  author = {Araujo, C. and Henriques, P.R. and Martini, R.G. and Almeida, J.J.}
}


@article{exercise-composition2016,
   title = {Exercise composition: From environment properties to composed problems},
   journal = {Advances in Intelligent Systems and Computing},
   year = {2016},
   volume = {445},
   pages = {235-244},
   author = {Araújo, I. and Almeida, J.J. and Smirnov, G.},
   doi= {https://doi.org/10.1007/978-3-319-31307-8_24},
   note={WorldCIST'16},
}


@article{ontoMP2016,
   title = {OntoMP, an ontology to build the museum of the person},
   journal = {Advances in Intelligent Systems and Computing},
   year = {2016},
   volume = {445},
   pages = {653-661},
   author = {Martini, R.G. and Ara{\'u}jo, C. and Almeida, J.J. and Henriques, P.R.},
   doi={https://doi.org/10.1007/978-3-319-31307-8_67},
   note={WorldCIST'16},
}

%%============================ 2017

@inproceedings { portosanto-worldcist2017,
   title = {Exercise generation on language specification},
   series = {Advances in Intelligent Systems and Computing, vol. 659},
   year = {2017},
   booktitle={Recent Advances in Information Systems and Technologies},
   pages = {277-286},
   note={WorldCIST'17},
   author = {Almeida, J.J. and Eliana Grande and Smirnov, G.},
   abstract ={Exercise generation on language specification is a challenging
problem, because of the richness of the objects in the domain.
In this paper we discuss Mgbeg (Meta-Grammar-Based Exercise Generator) -- a
toolkit for exercise generation on context-free languages.
Mgbeg approach is based on a meta-grammar formalism and tool, used to define 
a set of similar exercises.
Mgbeg is a simple attributed grammar used to describe the set of valid 
exercise (and randomly generate one of them).
Each exercise typically contains several attributes calculated during the 
generation steps: namely, one or more formal specification of the language
(context free grammar); the exercise statement; other information such as
examples, common mistakes, validation data, to be used in the construction
of the exercise statement, solution, and assessment steps.
Complementary the toolkit provides a grammar module, with functionality
for grammar comparison, sentence generation and recognition; a template
engine (to help in textual attributes calculation).
   },
}

%%========================= 2018

@incollection{Martins2018a,
     author       = {Ricardo Martins and J.João Almeida 
         and Pedro Rangel Henriques and Paulo Novais},
     title        = {Increasing authorship identification through emotional analysis},
     booktitle    = {Trends and Advances in Information Systems and Technologies, WorldCist2018},
     offeditor       =  {Álvaro Rocha and Hojjat Adeli and Luís Paulo Reis and Sandra Costanzo},
     series       = {Advances in Intelligent Systems and Computing},
     publisher    = {Springer International Publishing},
     year         = 2018,
     volume       = 745,
     pages        = {763-772},
     edition      = 1,
     month        = {March},
     doi          = {https://doi.org/10.1007/978-3-319-77703-0_76},
     isbn         = {978-3-319-77702-3}
}


@inproceedings{DBLP:conf/ideal/MarcondesAN18,
  author    = {Francisco S. Marcondes and
               José João Almeida and
               Paulo Novais},
  title     = {Chatbot Theory - A Naïve and Elementary Theory for Dialogue
               Management},
  booktitle = {{IDEAL} {(1)}},
  series    = {Lecture Notes in Computer Science},
  volume    = {11314},
  pages     = {374--384},
  publisher = {Springer},
  year      = {2018}
}

@inproceedings{DBLP:conf/bracis/Martins0ANH18,
  author    = {Ricardo Martins and
               Marco Gomes and
               José João Almeida and
               Paulo Novais and
               Pedro Rangel Henriques},
  title     = {Hate Speech Classification in Social Media Using Emotional Analysis},
  booktitle = {{BRACIS}},
  pages     = {61--66},
  publisher = {{IEEE} Computer Society},
  year      = {2018}
}

@inproceedings{DBLP:conf/dcai/MartinsAHN18,
  author    = {Ricardo Martins and
               José João Almeida and
               Pedro Rangel Henriques and
               Paulo Novais},
  title     = {Domain Identification Through Sentiment Analysis},
  booktitle = {{DCAI}},
  series    = {Advances in Intelligent Systems and Computing},
  volume    = {800},
  pages     = {276--283},
  publisher = {Springer},
  year      = {2018}
}

@inproceedings{DBLP:conf/slate/MendesA18,
  author    = {Rui Mendes and
               José João Almeida},
  title     = {eOS: The Exercise Operating System},
  booktitle = {{SLATE}},
  series    = {{OASICS}},
  volume    = {62},
  pages     = {5:1--5:13},
  publisher = {Schloss Dagstuhl - Leibniz-Zentrum fuer Informatik},
  year      = {2018}
}

@inproceedings{DBLP:conf/slate/Almeida18,
  author    = {José João Almeida},
  title     = {Abcl: Abc music notation with rich chord support (Short Paper)},
  booktitle = {{SLATE}},
  series    = {{OASICS}},
  volume    = {62},
  pages     = {8:1--8:8},
  publisher = {Schloss Dagstuhl - Leibniz-Zentrum fuer Informatik},
  year      = {2018}
}

@inproceedings{DBLP:conf/slate/MartinsAHN18,
  author    = {Ricardo Martins and
               José João Almeida and
               Pedro Rangel Henriques and
               Paulo Novais},
  title     = {Predicting Performance Problems Through Emotional Analysis (Short
               Paper)},
  booktitle = {{SLATE}},
  series    = {{OASICS}},
  volume    = {62},
  pages     = {19:1--19:9},
  publisher = {Schloss Dagstuhl - Leibniz-Zentrum fuer Informatik},
  year      = {2018}
}

@inproceedings{DBLP:conf/webmedia/MartinsANH18,
  author    = {Ricardo Martins and
               José João Almeida and
               Paulo Novais and
               Pedro Rangel Henriques},
  title     = {Creating a social media-based personal emotional lexicon},
  booktitle = {WebMedia},
  pages     = {261--264},
  publisher = {{ACM}},
  year      = {2018}
}

@inproceedings{DBLP:conf/worldcist/MartinsAHN18,
  author    = {Ricardo Martins and
               José João Almeida and
               Pedro Rangel Henriques and
               Paulo Novais},
  title     = {Increasing Authorship Identification Through Emotional Analysis},
  booktitle = {WorldCIST {(1)}},
  series    = {Advances in Intelligent Systems and Computing},
  volume    = {745},
  pages     = {763--772},
  publisher = {Springer},
  year      = {2018}
}


%%============================ 2019

@article{ cola19,
  title = {On solving cycle-free context-free grammar equivalence problem using numerical analysis},
  author= {José João Almeida   and
           Eliana Grande and
           Georgi Smirnov},
  journal={Journal of Computer Languages},
  publisher={Elsevier},
  volume= 51,
  pages ={48-56},
  
  Keywords={ Formal languages, Context-free grammars, Automatic assessment},
  abstract={
In this paper we consider the problem of cycle-free context-free grammars equivalence. To every context-free
grammar there corresponds a system of formal equations. Formally applying the iteration method to this system
we obtain the grammar axiom in the form of a formal power series composed of the words generated by the
grammar "multiplied" by the respective ambiguities.
We define a transform that attributes a matrix meaning to the system of formal equations and to formal power
series: terminal symbols are substituted by matrices and formal sum and product are substituted by the matrix
ones. In order to effectively compute the sum of a matrix series we numerically solve the system of matrix
equations. We prove distinguishability theorems showing that if two formal power series generated by cycle-free
context-free grammars are different, then there exists a matrix substitution such that the sums of the respective
matrix series are different. Based on this result, we suggest a procedure that can resolve the problem of
equivalence of cycle-free context-free grammars in many practical cases.
The results obtained in this paper form a theoretical basis for algorithms oriented to automatic assessment of
students' answers in computer science. We present the respective algorithms. Then we compare our approach
with a simple heuristic method based on CYK algorithm and discuss the limitations of our method.
}
}




@inproceedings{Almeida2019,
author={Almeida, J.J. and Mendes, R.C.},
title={Hunting ancestors: A unified approach for discovering genealogical information},
journal={OpenAccess Series in Informatics},
year={2019},
volume={74},
doi={10.4230/OASIcs.SLATE.2019.22},
art_number={22},
url={https://www.scopus.com/inward/record.uri?eid=2-s2.0-85071097688&doi=10.4230%2fOASIcs.SLATE.2019.22&partnerID=40&md5=8e2f42806d411bdfa553dcfa27be17a9},
abstract={This paper presents an unified approach for discovering
genealogical information. It presents a frameworks for storing information
concerning ancestors, locations, dates and documents. It also intends
to provide a framework that is able to perform inference concerning
dates by using constraints and for handling relations, locations and
sources. The DSL presented also aims to help users store information
from heterogeneous sources along with the evidence contained therein. ©
José J. Almeida and Rui C. Mendes.},
document_type={Conference Paper},
source={Scopus},
}


@InProceedings{elex.lexmart,
    title = {{LeXmart}: A Smart Tool for Lexicographers },
    author = {Alberto Simões and Ana Salgado and Rute Costa and José João Almeida},
    editor = {Kosem, I. and Zingano Kuhn, T. and Correia, M. and 
      Ferreira, J. P. and Jansen, M. and Pereira, I. and Kallas, J. and 
      Jakubícek, M. and Krek, S. and Tiberius, C.},
    year = {2019},
    booktitle = {Electronic lexicography in the 21st century. Proceedings of the eLex 2019 conference},
    pages = {453--466},
url2={https://www.scopus.com/inward/record.uri?eid=2-s2.0-85075350281&partnerID=40&md5=c5171c547089e5728c1cec0d5c755df1},
url={https://research.unl.pt/ws/portalfiles/portal/16378894/eLex_2019_25.pdf},
abstract={The digital era has brought some challenges to lexicographers,
but it has also brought new opportunities as part of the rise of
information technology and, more recently, the emergence of digital
humanities. This paper provides a description of LeXmart, the framework
that supports the digital development of the Portuguese Academy of
Sciences Dictionary. LeXmart is a smart tool framework to support
lexicographers' work that offers different types of tools, ranging from a
structural editor to a set of validation tools. Given that the dictionary
is stored in eXist-DB, LeXmart is developed on top of its ecosystem,
using W3C standard languages, and offering default functionalities
offered by eXist-DB, namely a RESTful API. © 2019 Lexical Computing CZ
s.r.o.. All rights reserved.},
}

@ARTICLE{Martins2019,
author={Martins, R. and Almeida, J.J. and Henriques, P. and Novais, P.},
title={A sentiment analysis approach to increase authorship identification},
journal={Expert Systems},
year={2019},
doi={10.1111/exsy.12469},
art_number={e12469},
url={https://www.scopus.com/inward/record.uri?eid=2-s2.0-85074844787&doi=10.1111%2fexsy.12469&partnerID=40&md5=bb5b7acab849e47b90246393026a4ba4},
abstract={Writing style is considered the manner in which an author
expresses his thoughts, influenced by language characteristics, period,
school, or nation. Often, this writing style can identify the author. One
of the most famous examples comes from 1914 in Portuguese literature. With
Fernando Pessoa and his heteronyms Alberto Caeiro, Álvaro de Campos,
and Ricardo Reis, who had completely different writing styles, led
people to believe that they were different individuals. Currently,
the discussion of authorship identification is more relevant because
of the considerable amount of widespread fake news in social media,
in which it is hard to identify who authored a text and even a simple
quote can impact the public image of an author, especially if these
texts or quotes are from politicians. This paper presents a process to
analyse the emotion contained in social media messages such as Facebook to
identify the author's emotional profile and use it to improve the ability
to predict the author of the message. Using preprocessing techniques,
lexicon-based approaches, and machine learning, we achieved an authorship
identification improvement of approximately 5% in the whole dataset
and more than 50% in specific authors when considering the emotional
profile on the writing style, thus increasing the ability to identify
the author of a text by considering only the author's emotional profile,
previously detected from prior texts. © 2019 John Wiley & Sons, Ltd.},
document_type={Article},
source={Scopus},
}

@inproceedings{Martins2019276,
author={Martins, R. and Almeida, J.J. and Henriques, P. and Novais, P.},
title={Domain identification through sentiment analysis},
journal={Advances in Intelligent Systems and Computing},
year={2019},
volume={800},
pages={276-283},
doi={10.1007/978-3-319-94649-8_33},
url={https://www.scopus.com/inward/record.uri?eid=2-s2.0-85049987273&doi=10.1007%2f978-3-319-94649-8_33&partnerID=40&md5=3fe3521d746330d391ee8ec0dd7bd4e9},
abstract={When dealing with chatbots, domain identification is an
important feature to adapt the interactions between user and computer in
order to increase the reliability of the communication and, consequently,
the audience and decrease its rejection avoiding misunderstandings. In
order to adapt to different domains, the writing style will be different
for the same author. For example, the same person in the role of a
student writes to his professor in a different style than he does for
his brother. This article presents a process that uses sentiment analysis
to identify the average emotional profile of the communication scenario
where the conversation is done. Using Natural Language Processing and
Machine Learning techniques, it was possible to obtain an index of
96.21% of correct classifications in the identification of where these
communications have occurred only analysing the emotional profile of
these texts. © Springer International Publishing AG, part of Springer
Nature 2019.},
document_type={Conference Paper},
source={Scopus},
}


%%===== 2020
@inproceedings{Silva2020,
author={Silva, Pedro and Almeida, J.J.},
title={Musikla: Language for generating musical events},
journal={OpenAccess Series in Informatics},
year={2020},
volume={83},
doi={10.4230/OASIcs.SLATE.2020.6},
art_number={A6},
url={https://www.scopus.com/inward/record.uri?eid=2-s2.0-85091704838&doi=10.4230%2fOASIcs.SLATE.2020.6&partnerID=40&md5=1c450e4e7bb940f5855eafaedb4ccba3},
abstract={In this paper, we'll discuss a simple approach to integrating
musical events, such as notes or chords, into a programming language. This
means treating music sequences as a first class citizen. It will be
possible to save those sequences into variables or play them right away,
pass them into functions or apply operators on them (like transposing or
repeating the sequence). Furthermore, instead of just allowing static
sequences to be generated, we'll integrate a music keyboard system
that easily allows the user to bind keys (or other kinds of events) to
expressions. Finally, it is important to provide the user with multiple
and extensible ways of outputing their music, such as synthesizing it into
a file or directly into the speakers, or writing a MIDI or music sheet
file. We'll structure this paper first with an analysis of the problem
and its particular requirements. Then we will discuss the solution we
developed to meet those requirements. Finally we'll analyze the result
and discuss possible alternative routes we could've taken. © 2020 Schloss
Dagstuhl- Leibniz-Zentrum fur Informatik GmbH, Dagstuhl Publishing. All
rights reserved.},
document_type={Conference Paper},
source={Scopus},
}

@inproceedings{Oliveira2020,
author={Oliveira, M. and Silva, P.M. and Moura, Pedro and Almeida, J.J. and Henriques, P.R.},
title={BhTSL, behavior trees specification and processing},
journal={OpenAccess Series in Informatics},
year={2020},
volume={83},
doi={10.4230/OASIcs.SLATE.2020.4},
art_number={A4},
url={https://www.scopus.com/inward/record.uri?eid=2-s2.0-85091707856&doi=10.4230%2fOASIcs.SLATE.2020.4&partnerID=40&md5=3b2daa7d548eeed77224386d6790adc7},
abstract={In the context of game development, there is always the
need for describing behaviors for various entities, whether NPCs or
even the world itself. That need requires a formalism to describe
properly such behaviors. As the gaming industry has been growing,
many approaches were proposed. First, finite state machines were used
and evolved to hierarchical state machines. As that formalism was not
enough, a more powerful concept appeared. Instead of using states for
describing behaviors, people started to use tasks. This concept was
incorporated in behavior trees. This paper focuses in the specification
and processing of Behavior Trees. A DSL designed for that purpose will
be introduced. It will also be discussed a generator that produces LATEX
diagrams to document the trees, and a Python module to implement the
behavior described. Additionally, a simulator will be presented. These
achievements will be illustrated using a concrete game as a case study. ©
2020 Schloss Dagstuhl- Leibniz-Zentrum fur Informatik GmbH, Dagstuhl
Publishing. All rights reserved.},
document_type={Conference Paper},
source={Scopus},
}

@inproceedings{Simões2020,
author={Simões, Alberto and Sacanene, B. and Iriarte, Alvaro and Almeida, J.J. and Macedo, J.},
title={Towards a morphological analyzer for the umbundu language},
journal={OpenAccess Series in Informatics},
year={2020},
volume={83},
doi={10.4230/OASIcs.SLATE.2020.10},
art_number={A10},
url={https://www.scopus.com/inward/record.uri?eid=2-s2.0-85091700212&doi=10.4230%2fOASIcs.SLATE.2020.10&partnerID=40&md5=26f3c0eacb3fc1ea35f005c08377b083},
abstract={In this document we present the first developments on an Umbundu
dictionary for a jSpell, a morphological analyzer. Initially some comments
are performed regarding the Umbundu language morphology, followed by the
discussion on jSpell dictionaries structure and its environment. Last, we
describe the Umbundu dictionary bootstrap process and perform some final
experiments on its coverage. © 2020 Schloss Dagstuhl- Leibniz-Zentrum
fur Informatik GmbH, Dagstuhl Publishing. All rights reserved.},
document_type={Conference Paper},
source={Scopus},
}


@inproceedings{Marcondes2020,
author={Marcondes, F.S. and Almeida, J.J. and Novais, P.},
title={Structural onomatology for username generation: A partial account},
journal={CEUR Workshop Proceedings},
year={2020},
volume={2655},
url={https://www.scopus.com/inward/record.uri?eid=2-s2.0-85090898082&partnerID=40&md5=3bee224fddd1133fbeb306d5c88737fa},
abstract={The username hints for most of the on-line social networks are
mostly unpleasant for human beings since they are mostly a simple name
variation followed by numbers. This paper shows that it is possible to
generate human likable usernames through heuristics guided by structural
onomastics. The objective then is to conceive heuristics as such and
check its availability in Twitter in order to verify if is it possible
to generate a sufficiently big and available username data-set that is
able to justify the transitions from unpleasant to a pleasant username
suggestion. This paper finds that it is possible to generate 8281 handles
on average through the proposed heuristics and their permutations,
therefore, the number of various possibilities is comfortable. This is
a partial account since not all possibilities were explored and some
improvements are required, but suits for a proof of concept and to
indicate paths. © 2020 CEUR-WS. All rights reserved.},
document_type={Conference Paper},
source={Scopus},
}

@inproceedings{Marcondes202028,
author={Marcondes, F.S. and Almeida, J.J. and Novais, P.},
title={A short survey on chatbot technology: Failure in raising the state of the art},
journal={Advances in Intelligent Systems and Computing},
year={2020},
volume={1003},
pages={28-36},
doi={10.1007/978-3-030-23887-2_4},
url={https://www.scopus.com/inward/record.uri?eid=2-s2.0-85068602421&doi=10.1007%2f978-3-030-23887-2_4&partnerID=40&md5=cbf6fb00a51eb082aa7e1097f926fece},
abstract={This short survey aimed initially to explore the existing
state of the art for the application of chatbot on fighting (and not on
spreading) of fake-news. It was then realized that there is not common to
use chatbots with this "virtuous" purpose. Therefore, after two surveys
and a meta-analysis, the topic had to be withdrawn since there were no
survey results to discuss besides the absence of results. The survey
result raised then a need to realize how chatbots are being currently
used, designed and their primary sources. The result was once again
confusing since, on the sample: (1) no significant concentration of usage
could be found; (2) no widely adopted design strategies were identified,
and (3) no significant crosscutting references to be considered as primary
sources. Certainly, this can be due to a biased sample but may also be a
symptom of a methodological issue on the chatbot researches. If the second
possibility is proved to be right it means that chatbot research is still
on a pre-paradigm stage according to Kuhn¿s conception. For this paper,
there were performed 4 surveys with a total sample of 50 papers mostly
from the last 3Â years. © Springer Nature Switzerland AG 2020.},
document_type={Conference Paper},
source={Scopus},
}

@ARTICLE{Marcondes2020170,
author={Marcondes, F.S. and Almeida, J.J. and Durães, D. and Novais, P.},
title={Fact-Check spreading behavior in twitter: A qualitative profile for false-claim news},
journal={Advances in Intelligent Systems and Computing},
year={2020},
volume={1160 AISC},
pages={170-180},
doi={10.1007/978-3-030-45691-7_16},
url={https://www.scopus.com/inward/record.uri?eid=2-s2.0-85086245198&doi=10.1007%2f978-3-030-45691-7_16&partnerID=40&md5=6547f11464462d6bfdb1505e6142b733},
abstract={Fact-check spread is usually performed by a plain tweet with
just the link. Since it is not proper human behavior, it may cause
uncanny, hinder the reader¿s attention and harm the counter-propaganda
influence. This paper presents a profile of fact-check link spread in
Twitter (suiting for TRL-1) and, as an additional outcome, proposes
a preliminary behavior design based on it (suiting for TRL-2). The
underlying hypothesis is by simulating human-like behavior, a bot gets
more attention and exerts more influence on its followers. © The Editor(s)
(if applicable) and The Author(s), under exclusive license to Springer
Nature Switzerland AG 2020.},
document_type={Conference Paper},
source={Scopus},
}

@ARTICLE{Martins2020134,
author={Martins, R. and Almeida, J. and Henriques, P. and Novais, P.},
title={Predicting an Election's Outcome Using Sentiment Analysis},
journal={Advances in Intelligent Systems and Computing},
year={2020},
volume={1159 AISC},
pages={134-143},
doi={10.1007/978-3-030-45688-7_14},
url={https://www.scopus.com/inward/record.uri?eid=2-s2.0-85085513930&doi=10.1007%2f978-3-030-45688-7_14&partnerID=40&md5=d559e334a2140bea6ea02051264b73c4},
abstract={Political debate - in its essence - carries a robust
emotional charge, and social media have become a vast arena for voters
to disseminate and discuss the ideas proposed by candidates. The
Brazilian presidential elections of 2018 were marked by a high level
of polarization, making the discussion of the candidates¿ ideas an
ideological battlefield, full of accusations and verbal aggression,
creating an excellent source for sentiment analysis. In this paper,
we analyze the emotions of the tweets posted about the presidential
candidates of Brazil on Twitter, so that it was possible to identify the
emotional profile of the adherents of each of the leading candidates,
and thus to discern which emotions had the strongest effects upon the
election results. Also, we created a model using sentiment analysis and
machine learning, which predicted with a correlation of 0.90 the final
result of the election. © 2020, The Editor(s) (if applicable) and The
Author(s), under exclusive license to Springer Nature Switzerland AG.},
document_type={Conference Paper},
source={Scopus},
}
@inproceedings{Martins201861,
author={Martins, R. and Gomes, M. and Almeida, J.J. and Novais, P. and Henriques, P.},
title={Hate speech classification in social media using emotional analysis},
journal={Proceedings - 2018 Brazilian Conference on Intelligent Systems, BRACIS 2018},
year={2018},
pages={61-66},
doi={10.1109/BRACIS.2018.00019},
art_number={8575590},
url={https://www.scopus.com/inward/record.uri?eid=2-s2.0-85060849408&doi=10.1109%2fBRACIS.2018.00019&partnerID=40&md5=10284a22b511c161a903debd79e5619a},
abstract={In this paper, we examine methods to classify hate speech
in social media. We aim to establish lexical baselines for this task
by applying classification methods using a dataset annotated for this
purpose. As features, our system uses Natural Language Processing (NLP)
techniques in order to expand the original dataset with emotional
information and provide it for machine learning classification. We
obtain results of 80.56% accuracy in hate speech identification, which
represents an increase of almost 100% from the original analysis used
as a reference. © 2018 IEEE.},
document_type={Conference Paper},
source={Scopus},
}
