@InProceedings{arranz-EtAl:2022:LEGAL,
  author    = {Arranz, Victoria  and  Choukri, Khalid  and  Cuadros, Montse  and  García Pablos, Aitor  and  Gianola, Lucie  and  Grouin, Cyril  and  Herranz, Manuel  and  Paroubek, Patrick  and  Zweigenbaum, Pierre},
  title     = {MAPA Project: Ready-to-Go Open-Source Datasets and Deep Learning Technology to Remove Identifying Information from Text Documents},
  booktitle      = {Proceedings of the Workshop on Ethical and Legal Issues in Human Language Technologies and Multilingual De-Identification of Sensitive Data In Language Resources within the 13th Language Resources and Evaluation Conference},
  month          = {June},
  year           = {2022},
  address        = {Marseille, France},
  publisher      = {European Language Resources Association},
  pages     = {64--72},
  abstract  = {This paper presents the outcomes of the MAPA project, a set of annotated corpora for 24 languages of the European Union and an open-source customisable toolkit able to detect and substitute sensitive information in text documents from any domain, using state-of-the art, deep learning-based named entity recognition techniques. In the context of the project, the toolkit has been developed and tested on administrative, legal and medical documents, obtaining state-of-the-art results. As a result of the project, 24 dataset packages have been released and the de-identification toolkit is available as open source.},
  url       = {https://aclanthology.org/2022.legal-1.12}
}

