nrc $home page

2015 :: 2014 :: 2013 :: 2012 :: 2011 :: 2010 :: 2008

2015

SplineAPI: A REST API for NLP services
__ by Nuno Vieira and Alberto Simões and Nuno Carvalho
__ in IV Symposium on Languages, Applications and Technologies
__ Abstract / BibTeX

@inProceedings{slate2015spline,
	title = {{SplineAPI}: {A REST API} for {NLP} services},
	author = {Nuno Vieira and Alberto Simões and Nuno Carvalho},
	isbn = {978-84-606-8762-7},
	editor = {José-Luís Sierra-Rodríguez and José Paulo Leal and Alberto Simões},
	booktitle = {{IV} Symposium on Languages, Applications and Technologies},
	year = {2015},
	pages = {101--110},
}

2014

The Per-Fide Corpus: A New Resource for Corpus-Based Terminology, Contrastive Linguistics and Translation Studies
__ by José João Almeida and Sílvia Araújo and Nuno Carvalho and Idalete Dias and Ana Oliveira and André Santos and Alberto Simões
__ in Working with Portuguese Corpora
__ Abstract / BibTeX

@incollection{sardinha2014,
    author = { José João Almeida and Sílvia Araújo and Nuno Carvalho and Idalete Dias and Ana Oliveira and André Santos and Alberto Simões},
    title = {The {Per-Fide} Corpus: A New Resource for Corpus-Based Terminology, Contrastive Linguistics and Translation Studies},
    booktitle = {Working with Portuguese Corpora},
    publisher = {Bloomsbury Publishing},
    year = {2014},
    editor = {Tony Berber Sardinha and Telma de Lurdes São Bento Ferreira},
    month = {April},
    pages={177-200},
    isbn = {978-1441190505},
}

2013

Open Source Software Documentation Mining for Quality Assessment
__ by Nuno Ramos Carvalho and Alberto Simões and José João Almeida
__ in Advances in Information Systems and Technologies
__ Abstract / BibTeX

@InProceedings{wcist2012-dmoss,
year={2013},
isbn={978-3-642-36980-3},
booktitle={Advances in Information Systems and Technologies},
volume={206},
series={Advances in Intelligent Systems and Computing},
editor={Rocha, Álvaro and Correia, Ana Maria and Wilson, Tom and Stroetmann, Karl A.},
idx={SCOPUS},
title={Open Source Software Documentation Mining for Quality Assessment},
publisher={Springer Berlin Heidelberg},
author={Nuno Ramos Carvalho and Alberto Simões and José João Almeida},
pages={785-794},
abstract={  Besides source code, the fundamental source of information about Open
  Source Software lies in documentation, and other non source code files,
  like \emph{README}, \emph{INSTALL}, or \emph{HowTo} files, commonly
  available in the software ecosystem. These documents, written
  in natural language, provide valuable information during the software
  development stage, but also in future maintenance and evolution tasks.

  DMOSS is a
  toolkit designed to systematically assess the quality of non source
  code text found in software packages. The toolkit handles a package
  as an attribute tree, and performs several tree traverse algorithms
  through a set of plugins, specialized in retrieving specific metrics
  from text, gathering information about the software. These metrics
  are later used to infer knowledge about the software, and composed
  together to build reports that assess the quality of specific
  features of the software.

  This paper discusses the motivations for this work, continues with a
  description of the toolkit implementation and design goals. Follows
  an example of its usage to process a software package, and the
  produced report. Finally some final remarks and trends for future
  work are presented.}
}

An ontology toolkit for problem domain concept location in program comprehension
__ by Carvalho, Nuno Ramos
__ in Proceedings of the 2013 International Conference on Software Engineering
__ Abstract / BibTeX

@inproceedings{carvalho2013ontology,
  title={An ontology toolkit for problem domain concept location in program comprehension},
  author={Carvalho, Nuno Ramos},
  booktitle={Proceedings of the 2013 International Conference on Software Engineering},
  pages={1415--1418},
  year={2013},
  organization={IEEE Press}
}

PASSAROLA: High-order exercise generation system
__ by Almeida, José João and Araújo, Isabel and Brito, Irene and Carvalho, Nuno and Machado, Gaspar J and Pereira, Rui and Smirnov, Georgi
__ in Information Systems and Technologies (CISTI), 2013 8th Iberian Conference on
__ Abstract / BibTeX

@inproceedings{almeida2013passarola,
  title={PASSAROLA: High-order exercise generation system},
  author={Almeida, José João and Araújo, Isabel and Brito, Irene and Carvalho, Nuno and Machado, Gaspar J and Pereira, Rui and Smirnov, Georgi},
  booktitle={Information Systems and Technologies (CISTI), 2013 8th Iberian Conference on},
  pages={1--5},
  year={2013},
  organization={IEEE}
}

Math exercise generation and smart assessment
__ by Almeida, José João and Araújo, Isabel and Brito, Irene and Carvalho, Nuno and Machado, Gaspar J and Pereira, Rui and Smirnov, Georgi
__ in Information Systems and Technologies (CISTI), 2013 8th Iberian Conference on
__ Abstract / BibTeX

@inproceedings{almeida2013math,
  title={Math exercise generation and smart assessment},
  author={Almeida, José João and Araújo, Isabel and Brito, Irene and Carvalho, Nuno and Machado, Gaspar J and Pereira, Rui and Smirnov, Georgi},
  booktitle={Information Systems and Technologies (CISTI), 2013 8th Iberian Conference on},
  pages={1--6},
  year={2013},
  organization={IEEE}
}

A framework for modular and customizable software analysis
__ by Martins, Pedro and Carvalho, Nuno and Fernandes, João Paulo and Almeida, José João and Saraiva, João
__ in Computational Science and Its Applications--ICCSA 2013
__ Abstract / BibTeX

@incollection{martins2013framework,
  title={A framework for modular and customizable software analysis},
  author={Martins, Pedro and Carvalho, Nuno and Fernandes, João Paulo and Almeida, José João and Saraiva, João},
  booktitle={Computational Science and Its Applications--ICCSA 2013},
  pages={443--458},
  year={2013},
  publisher={Springer}
}

Defining a Probabilistic Translation Dictionaries Algebra
__ by Alberto Simões and José João Almeida and Nuno Ramos Carvalho
__ in XVI Portuguese Conference on Artificial Inteligence - EPIA
__ Abstract / BibTeX

@inproceedings{tema2013-ptd,
 title={Defining a Probabilistic Translation Dictionaries Algebra},
 Author={ Alberto Simões and José João Almeida and Nuno Ramos Carvalho},
 Booktitle={ XVI Portuguese Conference on Artificial Inteligence - EPIA},
 Year= 2013,
 Month={ September},
 pages={444--455},
 editor = {Luís Correia and Luís Paulo Reis and José Cascalho and Luís
Gomes and Hélia Guerra and Pedro Cardoso},
 address = {Angra do Heroismo, Azores},
}

2012

Wiki::Score - A Collaborative Environment For Music Transcription And Publishing ** Best Paper Award **
__ by José João Almeida and Nuno Ramos Carvalho and José Nuno Oliveira
__ in Social Shaping of Digital Publishing: Exploring the Interplay Between Culture and Technology - Proceedings of the 16th International Conference on Electronic Publishing
__ Abstract / BibTeX

@InProceedings{elpub2012-wiki_score,
 author    = {José João Almeida and Nuno Ramos Carvalho and José Nuno Oliveira },
 title     = {Wiki::Score - A Collaborative Environment For Music Transcription And Publishing ** Best Paper Award **},
 booktitle = {Social Shaping of Digital Publishing: Exploring the Interplay Between Culture and Technology - Proceedings of the 16th International Conference on Electronic Publishing},
 editor    = {Ana Alice Baptista and Peter Linde and Niklas Lavesson and Miguel Abrunhosa de Brito},
 publisher = {IOS Press },
 year      = {2012},
 volume    = {0},
 pages     = {82-93},
 ISSN      = {978-1-61499-065-2},
 DOI       = {10.3233/978-1-61499-065-9-82},
 abstract  = {Music sources are most commontly shared in music scores
  scanned or printed on paper sheets. These artifacts are rich in
  information, but since they are images it is hard to re-use and share
  their content in todays' digital world. There are modern languages that
  can be used to transcribe music sheets, this is still a time consuming
  task, because of the complexity involved in the process and the typical
  huge size of the original documents.

  Wiki::Score is a collaborative environment where several people work
  together to transcribe music sheets to a shared medium, using the notation.
  This eases the process of transcribing huge documents, and stores the
  document in a well known notation, that can be used later on to publish
  the whole content in several formats, such as a PDF document, images or
  audio files for example.}
}

Probabilistic SynSet Based Concept Location
__ by Nuno Ramos Carvalho and José João Almeida and Maria João Varanda Pereira and Pedro Rangel Henriques
__ in SLATE'12 --- Symposium on Languages, Applications and Technologies
__ Abstract / BibTeX

@InProceedings{slate2012-probsynset,
 author =    {Nuno Ramos Carvalho and José João Almeida and Maria
João Varanda Pereira and Pedro Rangel Henriques},
 title =     {Probabilistic SynSet Based Concept Location},
 booktitle = {SLATE'12 --- Symposium on Languages, Applications and
Technologies},
 editor = {Alberto Simões and Ricardo Queirós and Daniela da Cruz},
 publisher = {OASIC -- Open Access Series in Informatics, Schloss
Dagstuhl – Leibniz-Zentrum für Informatik, Dagstuhl Publishing,
Germany},
 year =     {2012},
 month =     {June},
 volume = {21},
 pages     = {239-253},
 ISSN      = {978-3-939879-40-8},
 DOI       = {10.4320/OASIcs.SLATE.2012.I},
 abstract  = {Concept location is a common task in program comprehension
  techniques, essential in many approaches used for software care and
  software evolution. An important goal of this process is to discover
  a mapping between source code and human oriented concepts.

  Although programs are written in a strict and formal language, natural
  language terms and sentences like identifiers (variables or functions
  names), constant strings or comments, can still be found embedded in
  programs. Using terminology concepts and natural language processing
  techniques these terms can be exploited to discover clues about which
  real world concepts source code is addressing.

  This work extends symbol tables build by compilers with ontology
  driven constructs, extends synonym sets defined by linguistics, with
  automatically created Probabilistic SynSets from software
  domain parallel corpora. And using a relational algebra, creates
  semantic bridges between program elements and human oriented concepts,
  to enhance concept location tasks.}
}

Generating flex lexical analyzers for Perl Parse::Yapp
__ by Alberto Simões and Nuno Carvalho and José João Almeida
__ in SLATE'12 --- Symposium on Languages, Applications and Technologies
__ Abstract / BibTeX

@InProceedings{slate2012-flapp,
 author    = {Alberto Simões and Nuno Carvalho and José João Almeida},
 title     = {Generating flex lexical analyzers for Perl Parse::Yapp},
 booktitle = {SLATE'12 --- Symposium on Languages, Applications and
Technologies},
 editor    = {Alberto Simões and Ricardo Queirós and Daniela da Cruz},
 publisher = {OASIC -- Open Access Series in Informatics, Schloss
Dagstuhl – Leibniz-Zentrum für Informatik, Dagstuhl Publishing,
Germany},
 year      = {2012},
 month     = {June},
 volume    = {21},
 pages     = {239-253},
 ISSN      = {978-3-939879-40-8},
 DOI       = {10.4320/OASIcs.SLATE.2012.I},
 abstract  = {Perl is known for its versatile regular expressions. Nevertheless,
  using Perl regular expressions for creating fast lexical analyzer is not
  easy. As an alternative, the authors defend the automated generation
  of the lexical analyzer in a well known fast application (flex)
  based on a simple Perl definition in the syntactic analyzer.

  In this paper we extend the syntax used by Parse::Yapp, one
  of the most used parser generators for Perl, making the automatic
  generation of flex lexical scanners possible. We explain how this is
  performed and conclude with some benchmarks that show the relevance
  of the approach.}
}

Structural alignment of plain text books
__ by André Santos and José João Almeida and Nuno Carvalho
__ in Proceedings of the Eight International Conference on Language Resources and Evaluation (LREC'12)
__ Abstract / BibTeX

@InProceedings{LREC12.967,
  author = {André Santos and José João Almeida and Nuno Carvalho},
  title = {Structural alignment of plain text books},
  booktitle = {Proceedings of the Eight International Conference on Language Resources and Evaluation (LREC'12)},
  year = {2012},
  month = {may},
  date = {23-25},
  address = {Istanbul, Turkey},
  editor = {Nicoletta Calzolari and others}, 
  publisher = {European Language Resources Association (ELRA)},
  isbn = {978-2-9517408-7-7},
  language = {english},
  abstract  = { Text alignment is one of the main processes for obtaining
  parallel corpora. When aligning two versions of a book, results are often
  affected by unpaired sections -- sections which only exist in one of the
  versions of the book.
  We developed Text::Perfide::BookSync, a Perl module which performs
  books synchronization (structural alignment based on section
  delimitation), provided they have been previously annotated by
  Text::Perfide::BookCleaner. We discuss the need for such a tool and several
  implementation decisions. The main functions are described, and examples of
  input and output are presented.
  Text::Perfide::PartialAlign is an extension of the partialAlign.py
  tool bundled with hunalign which proposes an alternative methods for
  splitting bitexts.},
}

2011

PFTL: A Systematic Approach For Describing Filesystem Tree Processors
__ by Nuno Carvalho and Alberto Simões and José João Almeida and Pedro Rangel Henriques and Maria João Varanda Pereira
__ in INForum'11 --- Simpósio de Informática (CoRTA2011 track)
__ Abstract / BibTeX

@InProceedings{corta2011-pftl,
  author    = {Nuno Carvalho and Alberto Simões and José João Almeida and Pedro Rangel Henriques and Maria João Varanda Pereira},
  title     = {{PFTL}: A Systematic Approach For Describing Filesystem Tree Processors},
  booktitle = {INForum'11 --- Simpósio de Informática (CoRTA2011 track)},
  editor    = {Raul Barbosa and Luis Caires},
  publisher = {Dep. de Eng. Informática da Universidade de Coimbra},
  pages     = {222--233},
  isbn      = {978-989-96001-5-7},
  address   = {Coimbra, Portugal},
  year      = {2011},
  month     = {Setembro},
  language  = {EN},
  abstract  = {  Today, most 
 developers prefer to store information in databases. But
  plain filesystems were used for years, and are still used, to store
  information, commonly in files of heterogeneous formats that are
  organized in directory trees. This approach is a very flexible and
  natural way to create hierarchical organized structures of
  documents.

  We can devise a formal notation to describe a filesystem tree structure,
  similar to a grammar, assuming that filenames can be considered terminal
  symbols, and directory names non-terminal symbols. This specification
  would allow to derive correct language sentences (combination of terminal
  symbols) and to associate semantic actions, that can produce arbitrary
  side effects, to each valid sentence, just as we do in common parser
  generation tools. These specifications can be used to systematically
  process files in directory trees, and the final result depends on the
  semantic actions associated with each production rule.

  In this paper we revamped an old idea of using a domain specific
  language to implement these specifications similar to context free
  grammars. And introduce some examples of applications that can be
  built using this approach.
  },
}

Weaving OML in a General Purpose Programming Language
__ by Nuno Carvalho and José João Almeida and Alberto Simões
__ in INForum'11 --- Simpósio de Informática (CoRTA2011 track)
__ Abstract / BibTeX

@InProceedings{corta2011-oml,
  author    = {Nuno Carvalho and José João Almeida and Alberto Simões},
  title     = {Weaving {OML} in a General Purpose Programming Language},
  booktitle = {INForum'11 --- Simpósio de Informática (CoRTA2011 track)},
  editor    = {Raul Barbosa and Luis Caires},
  publisher = {Dep. de Eng. Informática da Universidade de Coimbra},
  isbn      = {978-989-96001-5-7},
  address   = {Coimbra, Portugal},
  year      = {2011},
  month     = {Setembro},
  language  = {EN},
  pages     = {184--197},
  abstract  = {
  Most existing programming languages can be categorized as general
  purpose programming languages, meaning that they can be used to
  implement solutions for any given domain. They are not, in any way,
  optimized for a specific set of problems. In contrast, Domain
  Specific Languages (DSL) are used to solve specific problems in a
  well defined domain. DSL are optimized to a particular set of
  problems, but they lack support for a wide range of operations that
  are required when dealing with real world problems. So, in a
  perfect world, we would like to implement applications using a
  general purpose programming language, but use a set of different DSL
  to handle specific domains' tasks.

  In this paper we describe a DSL named Ontology Manipulation Language
  (OML), designed to describe operations over
  with ontologies. Programs can be written
  using only the OML syntax and be executed independently. OML syntax
  was designed to deal with ontologies and the language itself is
  optimized to perform these tasks, which means that other relatively
  simpler tasks can not be easily done. To overcome this challenge a
  mechanism was developed so that you can weave small snippets of OML code
  inside Perl programs, meaning we have the power of OML to manipulate
  ontologies and, at the same time, all the paraphernalia of modules
  that Perl offers to handle everything else.
  },
}

xml2pm: A Tool for Automatic Creation of Object Definitions Based on XML Instances
__ by Nuno Carvalho and Alberto Simões and José João Almeida
__ in XATA 2010 --- 9ª Conferência Nacional em XML, Aplicações e Tecnologias Aplicadas
__ Abstract / BibTeX

@InProceedings{xml2pm-xata2011,
  author = {Nuno Carvalho and Alberto Simões and José João Almeida},
  title  = {xml2pm: A Tool for Automatic Creation of Object Definitions Based on {XML} Instances},
  booktitle = {{XATA 2010} --- 9ª Conferência Nacional em XML, Aplicações e Tecnologias Aplicadas },
  year = {2011},
  pages = {103--114},
  isbn = {978-989-96863-1-1},
  editor = {Alberto Simões},
  month = {1--2 June},
  address = {Vila do Conde, Portugal},
  lang = {EN},
  abstract = {
The eXtensible Mark-up Language (XML) is probably one of the 
most popular markup languages available today. It is very typical to find all kind 
of services or programs representing data in this format. This situation is even 
more common in web development environments or Service Oriented Architectures
 (SOA), where data flows from one service to another, being consumed and 
produced by an heterogeneous set of applications, which sole requirement is to 
understand XML. 

This workflow of data represented in XML implies some tasks that applications 
have to perform if they are required to consume or produce information: the 
task of parsing an XML document, giving specific semantics to the information 
parsed, and the task of producing an XML document. 

Our main goal is to create object definitions that can analyze an XML document 
and automatically create an object definition that can be used abstractly by the 
application. These objects are able to parse the XML document and gather all the 
data required to mimic all the information present in the document. 
This paper introduces xml2pm, a simple tool that can inspect the structure of 
an XML document and create an object definition (a Perl module) that stores the 
same information present in the orinial document, but as a runtime object. We also 
introduce a simple case of how this approach allows the creation of applications
based on Web Services in an elegant and simple way. 
  }
}

OML: A Scripting Approach for Manipulating Ontologies
__ by Nuno Carvalho and Alberto Simões and José João Almeida
__ in CISTI'11 - 6ª Conferência Ibérica de Sistemas e Tecnologias de Informação
__ Abstract / BibTeX

@InProceedings{oml-cisti2011,
  author    = {Nuno Carvalho and Alberto Simões and José João Almeida},
  title     = {{OML:} A Scripting Approach for Manipulating Ontologies},
  booktitle = {CISTI'11 - 6ª Conferência Ibérica de Sistemas e Tecnologias de Informação}, 
  lang = {EN},
  year      = {2011},
  pages     = {624--629},
  month     = {June},
  address   = {Chaves, Portugal},
  abstract = {  There are different definitions for ontologies. Different knowledge
  areas tend to define ontologies in a different way. For computer
  science, an ontology can be used to describe, in a well defined and
  structured way, knowledge about a specific domain. These artifacts
  store rich information that can be reasoned about, this information
  can also be target of many structured processing functions.
  There is a diversity of programs that can be implemented to take
  advantage of these features and produce applications in every area of
  knowledge.

  The Ontology Manipulation Language (OML) is a Domain Specific Language (DSL) designed to
  describe and execute operations that reason about ontologies. These
  reasoning operations can be used to manipulate and maintain the
  current information in the ontology, infer new knowledge or
  concepts, or even produce any kind of side effect. OML is a simple
  and descriptive language, yet it is powerful enough to implement
  complex operations or reasoning engines in a clear and efficient
  way.

  To actually run programs written in OML a stand alone compiler is
  available, as well as a mechanism for embedding OML programs in a
  generic programming language. This allows the quick development of
  applications that make use of ontologies, by describing ontology
  related operations in wove OML snippets throughout the code.  This
  mechanism has proven to be a very effective and clear approach for
  taking advantage of adopting ontologies to represent information,
  while maintaining the implicit advantages of using a general-goal
  programming language.  }
}

2010

Testing as a Certification Approach
__ by Alberto Simões and Nuno Carvalho and José João Almeida
__ in Electronic Communications of the EASST
__ Abstract / BibTeX

@article{perlcert2010,
  author    = {Alberto Simões and Nuno Carvalho and José João Almeida},
  title     = {Testing as a Certification Approach},
  journal   = {Electronic Communications of the EASST},
  volume    = {33},
  year      = {2010},
  editor    = {Luis Barbosa and Antonio Cerone and Siraj Shaikh (Guest Eds.)},
  note      = {Foundations and Techniques for Open Source Software Certification},
  issn =         {1863-2122},
  language =     {EN},
  abstract =     { For years, one of the main reasons to buy commercial
  software instead of adopting open-source applications was the,
  supposed, guarantee of quality. Unfortunately that was rarely true
  and, fortunately, open-source projects soon adopted some good
  practices in their code development that lead to better tested 
  software and therefore higher quality products.

  In this article we provide a guided tour of some of the best
  practices that have been implemented in the Perl community in the
  recent years, as the pathway to a better community-oriented repository
  of modules, with automatic distributed testing in different
  platforms and architectures, and with automatic quality measures
  calculation.
},
}

2008

NAVEGANTE --- An Intrusive Browsing Framework
__ by Nuno Carvalho and José João Almeida and Alberto Simões
__ in XATA 2008 --- 6ª Conferência Nacional em XML, Aplicações e Tecnologias Aplicadas
__ Abstract / BibTeX

@InProceedings{navegante08,
  author =       {Nuno Carvalho and José João Almeida and Alberto Simões},
  title =        {{NAVEGANTE} --- An Intrusive Browsing Framework},
  booktitle =    {{XATA 2008} --- 6ª Conferência Nacional em XML, Aplicações e Tecnologias Aplicadas},
  year =         {2008},
  month =        {February},
  url = {http://alfarrabio.di.uminho.pt/~albie/publications/navegante08.pdf},
  pages =        {52--63},
  isbn =         {978-972-99166-5-6},
  editor =       {José Carlos Ramalho and João Correia Lopes and Salvador Abreu},
  abstract =     {   NAVEGANTE is a generic framework to build superior order proxies for
  intrusive browsing. This framework provides the means for developing
  tools that behave as proxies, but perform some processing task on
  the content that is being browsed. Parallel to this content processing,
  applications can also run other user-defined functions with different 
  purposes and interfaces, but we'll explain those later. Currently, 
  NAVEGANTE only builds applications that run as CGIs, but this is intended 
  to change in a near future. Applications are built writing programs in 
  NAVEGANTE's Domain Specific Language (DSL).

  NAVEGANTE is a work in progress. This article aims to describe the current 
  state of development. What applications can be built and how. Also, we 
  identify some implementation problems, and briefly discuss some future
  improvements. Finally, we try to illustrate most of the concepts described 
  using a couple of case studies.
 }
}