@inProceedings{slate2015spline, title = {{SplineAPI}: {A REST API} for {NLP} services}, author = {Nuno Vieira and Alberto Simões and Nuno Carvalho}, isbn = {978-84-606-8762-7}, editor = {José-Luís Sierra-Rodríguez and José Paulo Leal and Alberto Simões}, booktitle = {{IV} Symposium on Languages, Applications and Technologies}, year = {2015}, pages = {101--110}, }
@incollection{sardinha2014, author = { José João Almeida and Sílvia Araújo and Nuno Carvalho and Idalete Dias and Ana Oliveira and André Santos and Alberto Simões}, title = {The {Per-Fide} Corpus: A New Resource for Corpus-Based Terminology, Contrastive Linguistics and Translation Studies}, booktitle = {Working with Portuguese Corpora}, publisher = {Bloomsbury Publishing}, year = {2014}, editor = {Tony Berber Sardinha and Telma de Lurdes São Bento Ferreira}, month = {April}, pages={177-200}, isbn = {978-1441190505}, }
@InProceedings{wcist2012-dmoss, year={2013}, isbn={978-3-642-36980-3}, booktitle={Advances in Information Systems and Technologies}, volume={206}, series={Advances in Intelligent Systems and Computing}, editor={Rocha, Álvaro and Correia, Ana Maria and Wilson, Tom and Stroetmann, Karl A.}, idx={SCOPUS}, title={Open Source Software Documentation Mining for Quality Assessment}, publisher={Springer Berlin Heidelberg}, author={Nuno Ramos Carvalho and Alberto Simões and José João Almeida}, pages={785-794}, abstract={ Besides source code, the fundamental source of information about Open Source Software lies in documentation, and other non source code files, like \emph{README}, \emph{INSTALL}, or \emph{HowTo} files, commonly available in the software ecosystem. These documents, written in natural language, provide valuable information during the software development stage, but also in future maintenance and evolution tasks. DMOSS is a toolkit designed to systematically assess the quality of non source code text found in software packages. The toolkit handles a package as an attribute tree, and performs several tree traverse algorithms through a set of plugins, specialized in retrieving specific metrics from text, gathering information about the software. These metrics are later used to infer knowledge about the software, and composed together to build reports that assess the quality of specific features of the software. This paper discusses the motivations for this work, continues with a description of the toolkit implementation and design goals. Follows an example of its usage to process a software package, and the produced report. Finally some final remarks and trends for future work are presented.} }
@inproceedings{carvalho2013ontology, title={An ontology toolkit for problem domain concept location in program comprehension}, author={Carvalho, Nuno Ramos}, booktitle={Proceedings of the 2013 International Conference on Software Engineering}, pages={1415--1418}, year={2013}, organization={IEEE Press} }
@inproceedings{almeida2013passarola, title={PASSAROLA: High-order exercise generation system}, author={Almeida, José João and Araújo, Isabel and Brito, Irene and Carvalho, Nuno and Machado, Gaspar J and Pereira, Rui and Smirnov, Georgi}, booktitle={Information Systems and Technologies (CISTI), 2013 8th Iberian Conference on}, pages={1--5}, year={2013}, organization={IEEE} }
@inproceedings{almeida2013math, title={Math exercise generation and smart assessment}, author={Almeida, José João and Araújo, Isabel and Brito, Irene and Carvalho, Nuno and Machado, Gaspar J and Pereira, Rui and Smirnov, Georgi}, booktitle={Information Systems and Technologies (CISTI), 2013 8th Iberian Conference on}, pages={1--6}, year={2013}, organization={IEEE} }
@incollection{martins2013framework, title={A framework for modular and customizable software analysis}, author={Martins, Pedro and Carvalho, Nuno and Fernandes, João Paulo and Almeida, José João and Saraiva, João}, booktitle={Computational Science and Its Applications--ICCSA 2013}, pages={443--458}, year={2013}, publisher={Springer} }
@inproceedings{tema2013-ptd, title={Defining a Probabilistic Translation Dictionaries Algebra}, Author={ Alberto Simões and José João Almeida and Nuno Ramos Carvalho}, Booktitle={ XVI Portuguese Conference on Artificial Inteligence - EPIA}, Year= 2013, Month={ September}, pages={444--455}, editor = {Luís Correia and Luís Paulo Reis and José Cascalho and Luís Gomes and Hélia Guerra and Pedro Cardoso}, address = {Angra do Heroismo, Azores}, }
@InProceedings{elpub2012-wiki_score, author = {José João Almeida and Nuno Ramos Carvalho and José Nuno Oliveira }, title = {Wiki::Score - A Collaborative Environment For Music Transcription And Publishing ** Best Paper Award **}, booktitle = {Social Shaping of Digital Publishing: Exploring the Interplay Between Culture and Technology - Proceedings of the 16th International Conference on Electronic Publishing}, editor = {Ana Alice Baptista and Peter Linde and Niklas Lavesson and Miguel Abrunhosa de Brito}, publisher = {IOS Press }, year = {2012}, volume = {0}, pages = {82-93}, ISSN = {978-1-61499-065-2}, DOI = {10.3233/978-1-61499-065-9-82}, abstract = {Music sources are most commontly shared in music scores scanned or printed on paper sheets. These artifacts are rich in information, but since they are images it is hard to re-use and share their content in todays' digital world. There are modern languages that can be used to transcribe music sheets, this is still a time consuming task, because of the complexity involved in the process and the typical huge size of the original documents. Wiki::Score is a collaborative environment where several people work together to transcribe music sheets to a shared medium, using the notation. This eases the process of transcribing huge documents, and stores the document in a well known notation, that can be used later on to publish the whole content in several formats, such as a PDF document, images or audio files for example.} }
@InProceedings{slate2012-probsynset, author = {Nuno Ramos Carvalho and José João Almeida and Maria João Varanda Pereira and Pedro Rangel Henriques}, title = {Probabilistic SynSet Based Concept Location}, booktitle = {SLATE'12 --- Symposium on Languages, Applications and Technologies}, editor = {Alberto Simões and Ricardo Queirós and Daniela da Cruz}, publisher = {OASIC -- Open Access Series in Informatics, Schloss Dagstuhl – Leibniz-Zentrum für Informatik, Dagstuhl Publishing, Germany}, year = {2012}, month = {June}, volume = {21}, pages = {239-253}, ISSN = {978-3-939879-40-8}, DOI = {10.4320/OASIcs.SLATE.2012.I}, abstract = {Concept location is a common task in program comprehension techniques, essential in many approaches used for software care and software evolution. An important goal of this process is to discover a mapping between source code and human oriented concepts. Although programs are written in a strict and formal language, natural language terms and sentences like identifiers (variables or functions names), constant strings or comments, can still be found embedded in programs. Using terminology concepts and natural language processing techniques these terms can be exploited to discover clues about which real world concepts source code is addressing. This work extends symbol tables build by compilers with ontology driven constructs, extends synonym sets defined by linguistics, with automatically created Probabilistic SynSets from software domain parallel corpora. And using a relational algebra, creates semantic bridges between program elements and human oriented concepts, to enhance concept location tasks.} }
@InProceedings{slate2012-flapp, author = {Alberto Simões and Nuno Carvalho and José João Almeida}, title = {Generating flex lexical analyzers for Perl Parse::Yapp}, booktitle = {SLATE'12 --- Symposium on Languages, Applications and Technologies}, editor = {Alberto Simões and Ricardo Queirós and Daniela da Cruz}, publisher = {OASIC -- Open Access Series in Informatics, Schloss Dagstuhl – Leibniz-Zentrum für Informatik, Dagstuhl Publishing, Germany}, year = {2012}, month = {June}, volume = {21}, pages = {239-253}, ISSN = {978-3-939879-40-8}, DOI = {10.4320/OASIcs.SLATE.2012.I}, abstract = {Perl is known for its versatile regular expressions. Nevertheless, using Perl regular expressions for creating fast lexical analyzer is not easy. As an alternative, the authors defend the automated generation of the lexical analyzer in a well known fast application (flex) based on a simple Perl definition in the syntactic analyzer. In this paper we extend the syntax used by Parse::Yapp, one of the most used parser generators for Perl, making the automatic generation of flex lexical scanners possible. We explain how this is performed and conclude with some benchmarks that show the relevance of the approach.} }
@InProceedings{LREC12.967, author = {André Santos and José João Almeida and Nuno Carvalho}, title = {Structural alignment of plain text books}, booktitle = {Proceedings of the Eight International Conference on Language Resources and Evaluation (LREC'12)}, year = {2012}, month = {may}, date = {23-25}, address = {Istanbul, Turkey}, editor = {Nicoletta Calzolari and others}, publisher = {European Language Resources Association (ELRA)}, isbn = {978-2-9517408-7-7}, language = {english}, abstract = { Text alignment is one of the main processes for obtaining parallel corpora. When aligning two versions of a book, results are often affected by unpaired sections -- sections which only exist in one of the versions of the book. We developed Text::Perfide::BookSync, a Perl module which performs books synchronization (structural alignment based on section delimitation), provided they have been previously annotated by Text::Perfide::BookCleaner. We discuss the need for such a tool and several implementation decisions. The main functions are described, and examples of input and output are presented. Text::Perfide::PartialAlign is an extension of the partialAlign.py tool bundled with hunalign which proposes an alternative methods for splitting bitexts.}, }
@InProceedings{corta2011-pftl, author = {Nuno Carvalho and Alberto Simões and José João Almeida and Pedro Rangel Henriques and Maria João Varanda Pereira}, title = {{PFTL}: A Systematic Approach For Describing Filesystem Tree Processors}, booktitle = {INForum'11 --- Simpósio de Informática (CoRTA2011 track)}, editor = {Raul Barbosa and Luis Caires}, publisher = {Dep. de Eng. Informática da Universidade de Coimbra}, pages = {222--233}, isbn = {978-989-96001-5-7}, address = {Coimbra, Portugal}, year = {2011}, month = {Setembro}, language = {EN}, abstract = { Today, most developers prefer to store information in databases. But plain filesystems were used for years, and are still used, to store information, commonly in files of heterogeneous formats that are organized in directory trees. This approach is a very flexible and natural way to create hierarchical organized structures of documents. We can devise a formal notation to describe a filesystem tree structure, similar to a grammar, assuming that filenames can be considered terminal symbols, and directory names non-terminal symbols. This specification would allow to derive correct language sentences (combination of terminal symbols) and to associate semantic actions, that can produce arbitrary side effects, to each valid sentence, just as we do in common parser generation tools. These specifications can be used to systematically process files in directory trees, and the final result depends on the semantic actions associated with each production rule. In this paper we revamped an old idea of using a domain specific language to implement these specifications similar to context free grammars. And introduce some examples of applications that can be built using this approach. }, }
@InProceedings{corta2011-oml, author = {Nuno Carvalho and José João Almeida and Alberto Simões}, title = {Weaving {OML} in a General Purpose Programming Language}, booktitle = {INForum'11 --- Simpósio de Informática (CoRTA2011 track)}, editor = {Raul Barbosa and Luis Caires}, publisher = {Dep. de Eng. Informática da Universidade de Coimbra}, isbn = {978-989-96001-5-7}, address = {Coimbra, Portugal}, year = {2011}, month = {Setembro}, language = {EN}, pages = {184--197}, abstract = { Most existing programming languages can be categorized as general purpose programming languages, meaning that they can be used to implement solutions for any given domain. They are not, in any way, optimized for a specific set of problems. In contrast, Domain Specific Languages (DSL) are used to solve specific problems in a well defined domain. DSL are optimized to a particular set of problems, but they lack support for a wide range of operations that are required when dealing with real world problems. So, in a perfect world, we would like to implement applications using a general purpose programming language, but use a set of different DSL to handle specific domains' tasks. In this paper we describe a DSL named Ontology Manipulation Language (OML), designed to describe operations over with ontologies. Programs can be written using only the OML syntax and be executed independently. OML syntax was designed to deal with ontologies and the language itself is optimized to perform these tasks, which means that other relatively simpler tasks can not be easily done. To overcome this challenge a mechanism was developed so that you can weave small snippets of OML code inside Perl programs, meaning we have the power of OML to manipulate ontologies and, at the same time, all the paraphernalia of modules that Perl offers to handle everything else. }, }
@InProceedings{xml2pm-xata2011, author = {Nuno Carvalho and Alberto Simões and José João Almeida}, title = {xml2pm: A Tool for Automatic Creation of Object Definitions Based on {XML} Instances}, booktitle = {{XATA 2010} --- 9ª Conferência Nacional em XML, Aplicações e Tecnologias Aplicadas }, year = {2011}, pages = {103--114}, isbn = {978-989-96863-1-1}, editor = {Alberto Simões}, month = {1--2 June}, address = {Vila do Conde, Portugal}, lang = {EN}, abstract = { The eXtensible Mark-up Language (XML) is probably one of the most popular markup languages available today. It is very typical to find all kind of services or programs representing data in this format. This situation is even more common in web development environments or Service Oriented Architectures (SOA), where data flows from one service to another, being consumed and produced by an heterogeneous set of applications, which sole requirement is to understand XML. This workflow of data represented in XML implies some tasks that applications have to perform if they are required to consume or produce information: the task of parsing an XML document, giving specific semantics to the information parsed, and the task of producing an XML document. Our main goal is to create object definitions that can analyze an XML document and automatically create an object definition that can be used abstractly by the application. These objects are able to parse the XML document and gather all the data required to mimic all the information present in the document. This paper introduces xml2pm, a simple tool that can inspect the structure of an XML document and create an object definition (a Perl module) that stores the same information present in the orinial document, but as a runtime object. We also introduce a simple case of how this approach allows the creation of applications based on Web Services in an elegant and simple way. } }
@InProceedings{oml-cisti2011, author = {Nuno Carvalho and Alberto Simões and José João Almeida}, title = {{OML:} A Scripting Approach for Manipulating Ontologies}, booktitle = {CISTI'11 - 6ª Conferência Ibérica de Sistemas e Tecnologias de Informação}, lang = {EN}, year = {2011}, pages = {624--629}, month = {June}, address = {Chaves, Portugal}, abstract = { There are different definitions for ontologies. Different knowledge areas tend to define ontologies in a different way. For computer science, an ontology can be used to describe, in a well defined and structured way, knowledge about a specific domain. These artifacts store rich information that can be reasoned about, this information can also be target of many structured processing functions. There is a diversity of programs that can be implemented to take advantage of these features and produce applications in every area of knowledge. The Ontology Manipulation Language (OML) is a Domain Specific Language (DSL) designed to describe and execute operations that reason about ontologies. These reasoning operations can be used to manipulate and maintain the current information in the ontology, infer new knowledge or concepts, or even produce any kind of side effect. OML is a simple and descriptive language, yet it is powerful enough to implement complex operations or reasoning engines in a clear and efficient way. To actually run programs written in OML a stand alone compiler is available, as well as a mechanism for embedding OML programs in a generic programming language. This allows the quick development of applications that make use of ontologies, by describing ontology related operations in wove OML snippets throughout the code. This mechanism has proven to be a very effective and clear approach for taking advantage of adopting ontologies to represent information, while maintaining the implicit advantages of using a general-goal programming language. } }
@article{perlcert2010, author = {Alberto Simões and Nuno Carvalho and José João Almeida}, title = {Testing as a Certification Approach}, journal = {Electronic Communications of the EASST}, volume = {33}, year = {2010}, editor = {Luis Barbosa and Antonio Cerone and Siraj Shaikh (Guest Eds.)}, note = {Foundations and Techniques for Open Source Software Certification}, issn = {1863-2122}, language = {EN}, abstract = { For years, one of the main reasons to buy commercial software instead of adopting open-source applications was the, supposed, guarantee of quality. Unfortunately that was rarely true and, fortunately, open-source projects soon adopted some good practices in their code development that lead to better tested software and therefore higher quality products. In this article we provide a guided tour of some of the best practices that have been implemented in the Perl community in the recent years, as the pathway to a better community-oriented repository of modules, with automatic distributed testing in different platforms and architectures, and with automatic quality measures calculation. }, }
@InProceedings{navegante08, author = {Nuno Carvalho and José João Almeida and Alberto Simões}, title = {{NAVEGANTE} --- An Intrusive Browsing Framework}, booktitle = {{XATA 2008} --- 6ª Conferência Nacional em XML, Aplicações e Tecnologias Aplicadas}, year = {2008}, month = {February}, url = {http://alfarrabio.di.uminho.pt/~albie/publications/navegante08.pdf}, pages = {52--63}, isbn = {978-972-99166-5-6}, editor = {José Carlos Ramalho and João Correia Lopes and Salvador Abreu}, abstract = { NAVEGANTE is a generic framework to build superior order proxies for intrusive browsing. This framework provides the means for developing tools that behave as proxies, but perform some processing task on the content that is being browsed. Parallel to this content processing, applications can also run other user-defined functions with different purposes and interfaces, but we'll explain those later. Currently, NAVEGANTE only builds applications that run as CGIs, but this is intended to change in a near future. Applications are built writing programs in NAVEGANTE's Domain Specific Language (DSL). NAVEGANTE is a work in progress. This article aims to describe the current state of development. What applications can be built and how. Also, we identify some implementation problems, and briefly discuss some future improvements. Finally, we try to illustrate most of the concepts described using a couple of case studies. } }