Publications

Download a BibTex file containing all these papers.
Publications (grouped by year): 2024, 2023, 2022, 2021, 2020, 2019, 2018, 2017, 2016, 2015, 2014, 2013, 2012, 2011, 2010, 2009, 2008, 2007, 2006, 2003.
2024
  • Yasaman RazeghiIshita DasguptaFangyu LiuVinay Venkatesh RamaseshSameer Singh.PlotTwist: Multimodal Models Don't Comprehend Simple Chart Details. Empirical Methods in Natural Language Processing (EMNLP). 2024 Conference
    [ BibTex ]
    @inproceedings{plottwist:emnlp24,
      author = {Yasaman Razeghi and Ishita Dasgupta and Fangyu Liu and Vinay Venkatesh Ramasesh and Sameer Singh},
      title = { {PlotTwist: Multimodal Models Don't Comprehend Simple Chart Details} },
      booktitle = {Empirical Methods in Natural Language Processing (EMNLP)},
      year = {2024}
    }
  • Catarina BelémMarkelle KellyMark SteyversSameer SinghPadhraic Smyth.Perceptions of Linguistic Uncertainty by Language Models and Humans. Empirical Methods in Natural Language Processing (EMNLP). 2024 Conference
    [ BibTex ]
    @inproceedings{linguncertain:emnlp24,
      author = {Catarina Belém and Markelle Kelly and Mark Steyvers and Sameer Singh and Padhraic Smyth},
      title = { {Perceptions of Linguistic Uncertainty by Language Models and Humans} },
      booktitle = {Empirical Methods in Natural Language Processing (EMNLP)},
      year = {2024}
    }
  • Anirudh AjithSameer SinghDanish Pruthi.Performance Trade-offs of a Family of Text Watermarks. Empirical Methods in Natural Language Processing (EMNLP). 2024 Conference
    [ BibTex ]
    @inproceedings{watermark:emnlp24,
      author = {Anirudh Ajith and Sameer Singh and Danish Pruthi},
      title = { {Performance Trade-offs of a Family of Text Watermarks} },
      booktitle = {Empirical Methods in Natural Language Processing (EMNLP)},
      year = {2024}
    }
  • Yanai ElazarBhargavi ParanjapeHao PengSarah WiegreffeKhyathi ChanduVivek SrikumarSameer SinghNoah A. Smith.Measuring and Improving Attentiveness to Partial Inputs with Counterfactuals. Empirical Methods in Natural Language Processing (EMNLP). 2024 Conference
    [ BibTex ]
    @inproceedings{partial:emnlp24,
      author = {Yanai Elazar and Bhargavi Paranjape and Hao Peng and Sarah Wiegreffe and Khyathi Chandu and Vivek Srikumar and Sameer Singh and Noah A. Smith},
      title = { {Measuring and Improving Attentiveness to Partial Inputs with Counterfactuals} },
      booktitle = {Empirical Methods in Natural Language Processing (EMNLP)},
      year = {2024}
    }
  • Kolby NottinghamBodhisattwa Prasad MajumderBhavana Dalvi MishraSameer SinghPeter ClarkRoy Fox.Skill Set Optimization: Reinforcing Language Model Behavior via Transferable Skills. International Conference on Machine Learning (ICML). 2024 Conference
    PDFArXiVOpenReview, BibTex ]
    @inproceedings{skillsetopt:icml24,
      author = {Kolby Nottingham and Bodhisattwa Prasad Majumder and Bhavana Dalvi Mishra and Sameer Singh and Peter Clark and Roy Fox},
      title = { {Skill Set Optimization: Reinforcing Language Model Behavior via Transferable Skills} },
      booktitle = {International Conference on Machine Learning (ICML)},
      year = {2024}
    }
  • Tamanna HossainSunipa DevSameer Singh.MisgenderMender: A Community-Informed Approach to Interventions for Misgendering. Annual Conference of the North American Chapter of the Association for Computational Linguistics (NAACL). 2024 Conference
    PDFArXiV, BibTex ]
    @inproceedings{misgendermender:naacl24,
      author = {Tamanna Hossain and Sunipa Dev and Sameer Singh},
      title = { {MisgenderMender: A Community-Informed Approach to Interventions for Misgendering} },
      booktitle = {Annual Conference of the North American Chapter of the Association for Computational Linguistics (NAACL)},
      year = {2024}
    }
  • Preethi SeshadriSameer SinghYanai Elazar.The Bias Amplification Paradox in Text-to-Image Generation. Annual Conference of the North American Chapter of the Association for Computational Linguistics (NAACL). 2024 Conference
    PDFArXiV, BibTex ]
    @inproceedings{biasampl:naacl24,
      author = {Preethi Seshadri and Sameer Singh and Yanai Elazar},
      title = { {The Bias Amplification Paradox in Text-to-Image Generation} },
      booktitle = {Annual Conference of the North American Chapter of the Association for Computational Linguistics (NAACL)},
      year = {2024}
    }
  • Raja Sekhar Reddy MekalaYasaman RazeghiSameer Singh.EchoPrompt: Instructing the Model to Rephrase Queries for Improved In-context Learning. Annual Conference of the North American Chapter of the Association for Computational Linguistics (NAACL). 2024 Conference
    PDFArXiV, BibTex ]
    @inproceedings{echoprompt:naacl24,
      author = {Raja Sekhar Reddy Mekala and Yasaman Razeghi and Sameer Singh},
      title = { {EchoPrompt: Instructing the Model to Rephrase Queries for Improved In-context Learning} },
      booktitle = {Annual Conference of the North American Chapter of the Association for Computational Linguistics (NAACL)},
      year = {2024}
    }
  • Kolby NottinghamYasaman RazeghiKyungmin KimJB LanierPierre BaldiRoy FoxSameer Singh.Selective Perception: Learning Concise State Descriptions for Language Model Actors. Annual Conference of the North American Chapter of the Association for Computational Linguistics (NAACL). 2024 Conference
    PDFArXiV, BibTex ]
    @inproceedings{selperception:naacl24,
      author = {Kolby Nottingham and Yasaman Razeghi and Kyungmin Kim and JB Lanier and Pierre Baldi and Roy Fox and Sameer Singh},
      title = { {Selective Perception: Learning Concise State Descriptions for Language Model Actors} },
      booktitle = {Annual Conference of the North American Chapter of the Association for Computational Linguistics (NAACL)},
      year = {2024}
    }
  • Catarina BelémPreethi SeshadriYasaman RazeghiSameer Singh.Are Models Biased on Text without Gender-related Language?. International Conference on Learning Representations (ICLR). 2024 Conference
    PDFArXiVOpenReviewProject PageVideo, BibTex ]
    @inproceedings{unstereoeval:iclr24,
      author = {Catarina Belém and Preethi Seshadri and Yasaman Razeghi and Sameer Singh},
      title = { {Are Models Biased on Text without Gender-related Language?} },
      booktitle = {International Conference on Learning Representations (ICLR)},
      year = {2024}
    }
  • Yanai ElazarAkshita BhagiaIan Helgi MagnussonAbhilasha RavichanderDustin SchwenkAlane SuhrEvan Pete WalshDirk GroeneveldLuca SoldainiSameer SinghHannaneh HajishirziNoah A. SmithJesse Dodge.What's In My Big Data?. International Conference on Learning Representations (ICLR). 2024 Conference
    PDFArXiVOpenReviewDemo, BibTex ]
    @inproceedings{wimbd:iclr24,
      author = {Yanai Elazar and Akshita Bhagia and Ian Helgi Magnusson and Abhilasha Ravichander and Dustin Schwenk and Alane Suhr and Evan Pete Walsh and Dirk Groeneveld and Luca Soldaini and Sameer Singh and Hannaneh Hajishirzi and Noah A. Smith and Jesse Dodge},
      title = { {What's In My Big Data?} },
      booktitle = {International Conference on Learning Representations (ICLR)},
      year = {2024}
    }
2023
  • Satyapriya KrishnaJiaqi MaDylan SlackAsma GhandehariounSameer SinghHimabindu Lakkaraju.Post Hoc Explanations of Language Models Can Improve Language Models. Neural Information Processing Systems (NeurIPS). 2023 Conference
    ArXiVPDF, BibTex ]
    @inproceedings{posthoc:neurips23,
      author = {Satyapriya Krishna and Jiaqi Ma and Dylan Slack and Asma Ghandeharioun and Sameer Singh and Himabindu Lakkaraju},
      title = { {Post Hoc Explanations of Language Models Can Improve Language Models} },
      booktitle = {Neural Information Processing Systems (NeurIPS)},
      year = {2023}
    }
  • Shivanshu GuptaMatt GardnerSameer Singh.Coverage-based Example Selection for In-Context Learning. Empirical Methods in Natural Language Processing (EMNLP). 2023 Conference
    ArXiVPDF, BibTex ]
    @inproceedings{demselect:emnlp23,
      author = {Shivanshu Gupta and Matt Gardner and Sameer Singh},
      title = { {Coverage-based Example Selection for In-Context Learning} },
      booktitle = {Empirical Methods in Natural Language Processing (EMNLP)},
      year = {2023}
    }
  • Dheeru DuaEmma StrubellSameer SinghPat Verga.To Adapt or to Annotate: Challenges and Interventions for Domain Adaptation in Open-Domain Question Answering. Association for Computational Linguistics (ACL). 2023 Conference
    ACL AnthologyArXiVPDF, Abstract, BibTex ]
    Recent advances in open-domain question answering (ODQA) have demonstrated impressive accuracy on general-purpose domains like Wikipedia. While some work has been investigating how well ODQA models perform when tested for out-of-domain (OOD) generalization, these studies have been conducted only under conservative shifts in data distribution and typically focus on a single component (i.e., retriever or reader) rather than an end-to-end system. This work proposes a more realistic end-to-end domain shift evaluation setting covering five diverse domains. We not only find that end-to-end models fail to generalize but that high retrieval scores often still yield poor answer prediction accuracy. To address these failures, we investigate several interventions, in the form of data augmentations, for improving model adaption and use our evaluation set to elucidate the relationship between the efficacy of an intervention scheme and the particular type of dataset shifts we consider. We propose a generalizability test that estimates the type of shift in a target dataset without training a model in the target domain and that the type of shift is predictive of which data augmentation schemes will be effective for domain adaption. Overall, we find that these interventions increase end-to-end performance by up to ~24 points.
    @inproceedings{adaptqa:acl23,
      author = {Dheeru Dua and Emma Strubell and Sameer Singh and Pat Verga},
      title = { {To Adapt or to Annotate: Challenges and Interventions for Domain Adaptation in Open-Domain Question Answering} },
      booktitle = {Association for Computational Linguistics (ACL)},
      doi = {10.18653/v1/2023.acl-long.807},
      pages = {14429–14446},
      year = {2023}
    }
  • Tamanna HossainSunipa DevSameer Singh.MISGENDERED: Limits of Large Language Models in Understanding Pronouns. Association for Computational Linguistics (ACL). 2023 Conference
    ACL AnthologyArXiVPDFVideoDemoCode, Abstract, BibTex ]
    Content Warning: This paper contains examples of misgendering and erasure that could be offensive and potentially triggering.Gender bias in language technologies has been widely studied, but research has mostly been restricted to a binary paradigm of gender. It is essential also to consider non-binary gender identities, as excluding them can cause further harm to an already marginalized group. In this paper, we comprehensively evaluate popular language models for their ability to correctly use English gender-neutral pronouns (e.g., singular they, them) and neo-pronouns (e.g., ze, xe, thon) that are used by individuals whose gender identity is not represented by binary pronouns. We introduce Misgendered, a framework for evaluating large language models’ ability to correctly use preferred pronouns, consisting of (i) instances declaring an individual’s pronoun, followed by a sentence with a missing pronoun, and (ii) an experimental setup for evaluating masked and auto-regressive language models using a unified method. When prompted out-of-the-box, language models perform poorly at correctly predicting neo-pronouns (averaging 7.6% accuracy) and gender-neutral pronouns (averaging 31.0% accuracy). This inability to generalize results from a lack of representation of non-binary pronouns in training data and memorized associations. Few-shot adaptation with explicit examples in the prompt improves the performance but plateaus at only 45.4% for neo-pronouns. We release the full dataset, code, and demo at https://tamannahossainkay.github.io/misgendered/.
    @inproceedings{misgendered:acl23,
      author = {Tamanna Hossain and Sunipa Dev and Sameer Singh},
      title = { {MISGENDERED: Limits of Large Language Models in Understanding Pronouns} },
      booktitle = {Association for Computational Linguistics (ACL)},
      doi = {10.18653/v1/2023.acl-long.293},
      pages = {5352–5367},
      year = {2023}
    }
  • Kolby NottinghamPrithviraj AmmanabroluAlane SuhrYejin ChoiHannaneh HajishirziSameer SinghRoy Fox.Do Embodied Agents Dream of Pixelated Sheep: Embodied Decision Making using Language Guided World Modelling. International Conference on Machine Learning (ICML). 2023 Conference
    ArXiVProject PageCode, BibTex ]
    @inproceedings{deckard:icml23,
      author = {Kolby Nottingham and Prithviraj Ammanabrolu and Alane Suhr and Yejin Choi and Hannaneh Hajishirzi and Sameer Singh and Roy Fox},
      title = { {Do Embodied Agents Dream of Pixelated Sheep: Embodied Decision Making using Language Guided World Modelling} },
      booktitle = {International Conference on Machine Learning (ICML)},
      pages = {26311-26325},
      year = {2023}
    }
  • Zhouhang XieSameer SinghJulian McAuleyBodhisattwa P. Majumder.Towards Factual and Informative Review Generation for Explainable Recommendation. AAAI Conference on Artificial Intelligence (AAAI). 2023 Conference
    ArXiV, BibTex ]
    @inproceedings{recomm:aaai23,
      author = {Zhouhang Xie and Sameer Singh and Julian McAuley and Bodhisattwa P. Majumder},
      title = { {Towards Factual and Informative Review Generation for Explainable Recommendation} },
      booktitle = {AAAI Conference on Artificial Intelligence (AAAI)},
      year = {2023}
    }
  • Margarita GeletaJiacen XuManikanta LoyaJunlin WangSameer SinghZhou LiSergio Gago Masague.Maestro: A Gamified Platform for Teaching AI Robustness. AAAI Symposium on Educational Advances in Artificial Intelligence (EAAI). 2023 Conference
    [ BibTex ]
    @inproceedings{maestro:eaai23,
      author = {Margarita Geleta and Jiacen Xu and Manikanta Loya and Junlin Wang and Sameer Singh and Zhou Li and Sergio Gago Masague},
      title = { {Maestro: A Gamified Platform for Teaching AI Robustness} },
      booktitle = {AAAI Symposium on Educational Advances in Artificial Intelligence (EAAI)},
      pages = {15816–15824},
      year = {2023}
    }
  • Elena KochkinaTamanna HossainRobert L. Logan IVMiguel Arana-CataniaRob ProcterArkaitz ZubiagaSameer SinghYulan HeMaria Liakata.Evaluating the generalisability of neural rumour verification models. Information Processing and Management. 2023 Journal
    Journal pagePDF, Abstract, BibTex ]
    Research on automated social media rumour verification, the task of identifying the veracity of questionable information circulating on social media, has yielded neural models achieving high performance, with accuracy scores that often exceed 90%. However, none of these studies focus on the real-world generalisability of the proposed approaches, that is whether the models perform well on datasets other than those on which they were initially trained and tested. In this work we aim to fill this gap by assessing the generalisability of top performing neural rumour verification models covering a range of different architectures from the perspectives of both topic and temporal robustness. For a more complete evaluation of generalisability, we collect and release COVID-RV, a novel dataset of Twitter conversations revolving around COVID-19 rumours. Unlike other existing COVID-19 datasets, our COVID-RV contains conversations around rumours that follow the format of prominent rumour verification benchmarks, while being different from them in terms of topic and time scale, thus allowing better assessment of the temporal robustness of the models. We evaluate model performance on COVID-RV and three popular rumour verification datasets to understand limitations and advantages of different model architectures, training datasets and evaluation scenarios. We find a dramatic drop in performance when testing models on a different dataset from that used for training. Further, we evaluate the ability of models to generalise in a few-shot learning setup, as well as when word embeddings are updated with the vocabulary of a new, unseen rumour. Drawing upon our experiments we discuss challenges and make recommendations for future research directions in addressing this important problem.
    @article{rumors:ipm23,
      author = {Elena Kochkina and Tamanna Hossain and Robert L. Logan IV and Miguel Arana-Catania and Rob Procter and Arkaitz Zubiaga and Sameer Singh and Yulan He and Maria Liakata},
      title = { {Evaluating the generalisability of neural rumour verification models} },
      journal = {Information Processing and Management},
      doi = {10.1016/j.ipm.2022.103116},
      year = {2023}
    }
  • Dylan SlackSatyapriya KrishnaHimabindu LakkarajuSameer Singh.Explaining machine learning models with interactive natural language conversations using TalkToModel. Nature Machine Intelligence. 2023 Journal
    Nature pagePDFCodeArXiVDemo, Abstract, BibTex ]
    Practitioners increasingly use machine learning (ML) models, yet models have become more complex and harder to understand. To understand complex models, researchers have proposed techniques to explain model predictions. However, practitioners struggle to use explainability methods because they do not know which explanation to choose and how to interpret the explanation. Here we address the challenge of using explainability methods by proposing TalkToModel: an interactive dialogue system that explains ML models through natural language conversations. TalkToModel consists of three components: an adaptive dialogue engine that interprets natural language and generates meaningful responses; an execution component that constructs the explanations used in the conversation; and a conversational interface. In real-world evaluations, 73% of healthcare workers agreed they would use TalkToModel over existing systems for understanding a disease prediction model, and 85% of ML professionals agreed TalkToModel was easier to use, demonstrating that TalkToModel is highly effective for model explainability.
    @article{talktomodel:ni23,
      author = {Dylan Slack and Satyapriya Krishna and Himabindu Lakkaraju and Sameer Singh},
      title = { {Explaining machine learning models with interactive natural language conversations using TalkToModel} },
      journal = {Nature Machine Intelligence},
      doi = {10.1038/s42256-023-00692-8},
      year = {2023}
    }
2022
  • Dheeru DuaShivanshu GuptaSameer SinghMatt Gardner.Successive Prompting for Decomposing Complex Questions. Empirical Methods in Natural Language Processing (EMNLP). 2022 Conference
    ACL Anthology, BibTex ]
    @inproceedings{decompqa:emnlp22,
      author = {Dheeru Dua and Shivanshu Gupta and Sameer Singh and Matt Gardner},
      title = { {Successive Prompting for Decomposing Complex Questions} },
      booktitle = {Empirical Methods in Natural Language Processing (EMNLP)},
      year = {2022}
    }
  • Zhaofeng WuRobert L. Logan IVPete WalshAkshita BhagiaDirk GroeneveldSameer SinghIz Beltagy.Continued Pretraining for Better Zero- and Few-Shot Promptability. Empirical Methods in Natural Language Processing (EMNLP). 2022 Conference
    ACL AnthologyPDF, BibTex ]
    @inproceedings{pretraining:emnlp22,
      author = {Zhaofeng Wu and Robert L. Logan IV and Pete Walsh and Akshita Bhagia and Dirk Groeneveld and Sameer Singh and Iz Beltagy},
      title = { {Continued Pretraining for Better Zero- and Few-Shot Promptability} },
      booktitle = {Empirical Methods in Natural Language Processing (EMNLP)},
      doi = {10.18653/v1/2022.emnlp-main.300},
      pages = {4517–4531},
      year = {2022}
    }
  • Yasaman RazeghiRobert L. Logan IVMatt GardnerSameer Singh.Impact of Pretraining Term Frequencies on Few-Shot Numerical Reasoning. Findings of the Association for Computational Linguistics: EMNLP (EMNLP Findings). 2022 Conference
    ACL AnthologyPDF, BibTex ]
    @inproceedings{impact:femnlp22,
      author = {Yasaman Razeghi and Robert L. Logan IV and Matt Gardner and Sameer Singh},
      title = { {Impact of Pretraining Term Frequencies on Few-Shot Numerical Reasoning} },
      booktitle = {Findings of the Association for Computational Linguistics: EMNLP (EMNLP Findings)},
      year = {2022}
    }
  • Shivanshu GuptaSameer SinghMatt Gardner.Structurally Diverse Sampling for Sample-Efficient Training and Comprehensive Evaluation. Findings of the Association for Computational Linguistics: EMNLP (EMNLP Findings). 2022 Conference
    ArXiVPDFACL Anthology, BibTex ]
    @inproceedings{structdiversity:femnlp22,
      author = {Shivanshu Gupta and Sameer Singh and Matt Gardner},
      title = { {Structurally Diverse Sampling for Sample-Efficient Training and Comprehensive Evaluation} },
      booktitle = {Findings of the Association for Computational Linguistics: EMNLP (EMNLP Findings)},
      doi = {10.18653/v1/2022.findings-emnlp.365},
      pages = {4966–4979},
      year = {2022}
    }
  • Kolby NottinghamAlekhya PylaSameer SinghRoy Fox.Learning to Query Internet Text for Informing Reinforcement Learning Agents. Reinforcement Learning and Decision Making (RLDM). 2022 Conference
    Extended abstract
    PDFArXiV, Abstract, BibTex ]
    Generalization to out of distribution tasks in reinforcement learning is a challenging problem. One successful approach improves generalization by conditioning policies on task or environment descriptions that provide information about the current transition or reward functions. Previously, these descriptions were often expressed as generated or crowd sourced text. In this work, we begin to tackle the problem of extracting useful information from natural language found in the wild (e.g. internet forums, documentation, and wikis). These natural, pre-existing sources are especially challenging, noisy, and large and present novel challenges compared to previous approaches. We propose to address these challenges by training reinforcement learning agents to learn to query these sources as a human would, and we experiment with how and when an agent should query. To address the how, we demonstrate that pretrained QA models perform well at executing zero-shot queries in our target domain. Using information retrieved by a QA model, we train an agent to learn when it should execute queries. We show that our method correctly learns to execute queries to maximize reward in a reinforcement learning setting.
    @inproceedings{queryrl:rldm22,
      author = {Kolby Nottingham and Alekhya Pyla and Sameer Singh and Roy Fox},
      title = { {Learning to Query Internet Text for Informing Reinforcement Learning Agents} },
      booktitle = {Reinforcement Learning and Decision Making (RLDM)},
      year = {2022}
    }
  • Robert L. Logan IVAlexandre PassosSameer SinghMing-Wei Chang.FRUIT: Faithfully Reflecting Updated Information in Text. Annual Conference of the North American Chapter of the Association for Computational Linguistics (NAACL). 2022 Conference
    Best Task Paper Award
    PDFACL AnthologyArXiV, Abstract, BibTex ]
    Textual knowledge bases such as Wikipedia require considerable effort to keep up to date and consistent. While automated writing assistants could potentially ease this burden, the problem of suggesting edits grounded in external knowledge has been under-explored. In this paper, we introduce the novel generation task of *faithfully reflecting updated information in text* (FRUIT) where the goal is to update an existing article given new evidence. We release the FRUIT-WIKI dataset, a collection of over 170K distantly supervised data produced from pairs of Wikipedia snapshots, along with our data generation pipeline and a gold evaluation set of 914 instances whose edits are guaranteed to be supported by the evidence. We provide benchmark results for popular generation systems as well as EDIT5 – a T5-based approach tailored to editing we introduce that establishes the state of the art. Our analysis shows that developing models that can update articles faithfully requires new capabilities for neural generation models, and opens doors to many new applications.
    @inproceedings{fruit:naacl22,
      author = {Robert L. Logan IV and Alexandre Passos and Sameer Singh and Ming-Wei Chang},
      title = { {FRUIT: Faithfully Reflecting Updated Information in Text} },
      booktitle = {Annual Conference of the North American Chapter of the Association for Computational Linguistics (NAACL)},
      doi = {10.18653/v1/2022.naacl-main.269},
      pages = {3670–3686},
      year = {2022}
    }
  • Sanjay SubramanianWilliam MerrillTrevor DarrellMatt GardnerSameer SinghAnna Rohrbach.ReCLIP: A Strong Zero-Shot Baseline for Referring Expression Comprehension . Association for Computational Linguistics (ACL). 2022 Conference
    PDFACL AnthologyCode, Abstract, BibTex ]
    Training a referring expression comprehension (ReC) model for a new visual domain requires collecting referring expressions, and potentially corresponding bounding boxes, for images in the domain. While large-scale pre-trained models are useful for image classification across domains, it remains unclear if they can be applied in a zero-shot manner to more complex tasks like ReC. We present ReCLIP, a simple but strong zero-shot baseline that repurposes CLIP, a state-of-the-art large-scale model, for ReC. Motivated by the close connection between ReC and CLIP’s contrastive pre-training objective, the first component of ReCLIP is a region-scoring method that isolates object proposals via cropping and blurring, and passes them to CLIP. However, through controlled experiments on a synthetic dataset, we find that CLIP is largely incapable of performing spatial reasoning off-the-shelf. We reduce the gap between zero-shot baselines from prior work and supervised models by as much as 29% on RefCOCOg, and on RefGTA (video game imagery), ReCLIP’s relative improvement over supervised ReC models trained on real images is 8%.
    @inproceedings{reclip:acl22,
      author = {Sanjay Subramanian and William Merrill and Trevor Darrell and Matt Gardner and Sameer Singh and Anna Rohrbach},
      title = { {ReCLIP: A Strong Zero-Shot Baseline for Referring Expression Comprehension } },
      booktitle = {Association for Computational Linguistics (ACL)},
      doi = {10.18653/v1/2022.acl-long.357},
      pages = {5198-5215},
      year = {2022}
    }
  • Pouya PezeshkpourSarthak JainSameer SinghByron Wallace.Combining Feature and Instance Attribution to Detect Artifacts. Findings of the Association for Computational Linguistics (ACL Findings). 2022 Conference
    PDFArXiVACL Anthology, Abstract, BibTex ]
    Training the deep neural networks that dominate NLP requires large datasets. These are often collected automatically or via crowdsourcing, and may exhibit systematic biases or annotation artifacts. By the latter we mean spurious correlations between inputs and outputs that do not represent a generally held causal relationship between features and classes; models that exploit such correlations may appear to perform a given task well, but fail on out of sample data. In this paper we evaluate use of different attribution methods for aiding identification of training data artifacts. We propose new hybrid approaches that combine saliency maps (which highlight "important" input features) with instance attribution methods (which retrieve training samples "influential" to a given prediction). We show that this proposed training-feature attribution can be used to efficiently uncover artifacts in training data when a challenging validation set is available. We also carry out a small user study to evaluate whether these methods are useful to NLP researchers in practice, with promising results.
    @inproceedings{tfa:facl22,
      author = {Pouya Pezeshkpour and Sarthak Jain and Sameer Singh and Byron Wallace},
      title = { {Combining Feature and Instance Attribution to Detect Artifacts} },
      booktitle = {Findings of the Association for Computational Linguistics (ACL Findings)},
      doi = {10.18653/v1/2022.findings-acl.153},
      pages = {1934–1946},
      year = {2022}
    }
  • Robert L. Logan IVIvana BalaževićEric WallaceFabio PetroniSameer SinghSebastian Riedel.Cutting Down on Prompts and Parameters: Simple Few-Shot Learning with Language Models. Findings of the Association for Computational Linguistics (ACL Findings). 2022 Conference
    Also presented at the Neurips workshop on Efficient Natural Language and Speech Processing (ENLSP)
    PDFArXiVACL AnthologyCode, Abstract, BibTex ]
    Prompting language models (LMs) with training examples and task descriptions has been seen as critical to recent successes in few-shot learning. In this work, we show that finetuning LMs in the few-shot setting can considerably reduce the need for prompt engineering. In fact, one can use null prompts, prompts that contain neither task-specific templates nor training examples, and achieve competitive accuracy to manually-tuned prompts across a wide range of tasks. While finetuning LMs does introduce new parameters for each downstream task, we show that this memory overhead can be substantially reduced: finetuning only the bias terms can achieve comparable or better accuracy than standard finetuning while only updating 0.1% of the parameters. All in all, we recommend finetuning LMs for few-shot learning as it is more accurate, robust to different prompts, and can be made nearly as efficient as using frozen LMs.
    @inproceedings{cutting:facl22,
      author = {Robert L. Logan IV and Ivana Balažević and Eric Wallace and Fabio Petroni and Sameer Singh and Sebastian Riedel},
      title = { {Cutting Down on Prompts and Parameters: Simple Few-Shot Learning with Language Models} },
      booktitle = {Findings of the Association for Computational Linguistics (ACL Findings)},
      doi = {10.18653/v1/2022.findings-acl.222},
      pages = {2824–2835},
      year = {2022}
    }
  • Yoshitomo MatsubaraDavide CallegaroSameer SinghMarco LevoratoFrancesco Restuccia.BottleFit: Learning Compressed Representations in Deep Neural Networks for Effective and Efficient Split Computing. IEEE International Symposium on a World of Wireless, Mobile and Multimedia Networks (WoWMoM). 2022 Conference
    ArXiV PagePDF, Abstract, BibTex ]
    Although mission-critical applications require the use of deep neural networks (DNNs), their continuous execution at mobile devices results in a significant increase in energy consumption. While edge offloading can decrease energy consumption, erratic patterns in channel quality, network and edge server load can lead to severe disruption of the system’s key operations. An alternative approach, called split computing, generates compressed representations within the model (called "bottlenecks"), to reduce bandwidth usage and energy consumption. Prior work has proposed approaches that introduce additional layers, to the detriment of energy consumption and latency. For this reason, we propose a new framework called BottleFit, which, in addition to targeted DNN architecture modifications, includes a novel training strategy to achieve high accuracy even with strong compression rates. We apply BottleFit on cutting-edge DNN models in image classification, and show that BottleFit achieves 77.1% data compression with up to 0.6% accuracy loss on ImageNet dataset, while state of the art such as SPINN loses up to 6% in accuracy. We experimentally measure the power consumption and latency of an image classification application running on an NVIDIA Jetson Nano board (GPU-based) and a Raspberry PI board (GPU-less). We show that BottleFit decreases power consumption and latency respectively by up to 49% and 89% with respect to (w.r.t.) local computing and by 37% and 55% w.r.t. edge offloading. We also compare BottleFit with state-of-the-art autoencoders-based approaches, and show that (i) BottleFit reduces power consumption and execution time respectively by up to 54% and 44% on the Jetson and 40% and 62% on Raspberry PI; (ii) the size of the head model executed on the mobile device is 83 times smaller. We publish the code repository for reproducibility of the results in this study.
    @inproceedings{bottlefit:wowmom22,
      author = {Yoshitomo Matsubara and Davide Callegaro and Sameer Singh and Marco Levorato and Francesco Restuccia},
      title = { {BottleFit: Learning Compressed Representations in Deep Neural Networks for Effective and Efficient Split Computing} },
      booktitle = {IEEE International Symposium on a World of Wireless, Mobile and Multimedia Networks (WoWMoM)},
      doi = {10.1109/WoWMoM54355.2022.00032},
      pages = {337-346},
      year = {2022}
    }
  • Yasaman RazeghiRaja Sekhar Reddy MekalaRobert L. Logan IVMatt GardnerSameer Singh.Snoopy: An Online Interface for Exploring the Effect of Pretraining Term Frequencies on Few-Shot LM Performance. Demo at the Empirical Methods in Natural Language Processing (EMNLP). 2022 Demo
    DemoACL AnthologyPDF, BibTex ]
    @inproceedings{snoopy:emnlp22,
      author = {Yasaman Razeghi and Raja Sekhar Reddy Mekala and Robert L. Logan IV and Matt Gardner and Sameer Singh},
      title = { {Snoopy: An Online Interface for Exploring the Effect of Pretraining Term Frequencies on Few-Shot LM Performance} },
      booktitle = {Demo at the Empirical Methods in Natural Language Processing (EMNLP)},
      year = {2022}
    }
  • Kareem AhmedTao LiThy TonQuan GuoKai-Wei ChangParisa KordjamshidiVivek SrikumarGuy Van den BroeckSameer Singh.PYLON: A PyTorch Framework for Learning with Constraints. Demo at the AAAI Conference on Artificial Intelligence (AAAI). 2022 Demo
    Also presented as a Demo paper at Neurips 2021.
    AAAI ProceedingsAAAI PDF (shorter)Neurips ProceedingsNeurIPS PDF (longer)WebsiteCodeVideo, Abstract, BibTex ]
    Deep learning excels at learning low-level task information from large amounts of data, but struggles with learning high-level domain knowledge, which can often be directly and succinctly expressed. In this work, we introduce Pylon, a neuro-symbolic training framework that builds on PyTorch to augment procedurally trained neural networks with declaratively specified knowledge. Pylon allows users to programmatically specify constraints as PyTorch functions, and compiles them into a differentiable loss, thus training predictive models that fit the data whilst satisfying the specified constraints. Pylon includes both exact as well as approximate compilers to efficiently compute the loss, employing fuzzy logic, sampling methods, and circuits, ensuring scalability even to complex models and constraints. A guiding principle in designing Pylon has been the ease with which any existing deep learning codebase can be extended to learn from constraints using only a few lines: a function expressing the constraint and a single line of code to compile it into a loss. We include case studies from natural language processing, computer vision, logical games, and knowledge graphs, that can be interactively trained, and highlights Pylon's usage.
    @inproceedings{pylon:aaai22,
      author = {Kareem Ahmed and Tao Li and Thy Ton and Quan Guo and Kai-Wei Chang and Parisa Kordjamshidi and Vivek Srikumar and Guy Van den Broeck and Sameer Singh},
      title = { {PYLON: A PyTorch Framework for Learning with Constraints} },
      booktitle = {Demo at the AAAI Conference on Artificial Intelligence (AAAI)},
      doi = {10.1609/aaai.v36i11.21711},
      pages = {13152-13154},
      year = {2022}
    }
  • Huwail J.AlantariImran S.CurrimYiting DengSameer Singh.An Empirical Comparison of Machine Learning Methods for Text-based Sentiment Analysis of Online Consumer Reviews. International Journal of Research in Marketing. 2022 Journal
    Journal, BibTex ]
    @article{sentiment:ijrm22,
      author = {Huwail J.Alantari and Imran S.Currim and Yiting Deng and Sameer Singh},
      title = { {An Empirical Comparison of Machine Learning Methods for Text-based Sentiment Analysis of Online Consumer Reviews} },
      journal = {International Journal of Research in Marketing},
      volume = {39},
      number = {1},
      doi = {10.1016/j.ijresmar.2021.10.011},
      pages = {1-19},
      year = {2022}
    }
  • Preethi SeshadriPouya PezeshkpourSameer Singh.Quantifying Social Biases Using Templates is Unreliable. TSRML Workshop @ NeurIPS. 2022 Workshop
    ArXiV, BibTex ]
    @inproceedings{templates:tsrml22,
      author = {Preethi Seshadri and Pouya Pezeshkpour and Sameer Singh},
      title = { {Quantifying Social Biases Using Templates is Unreliable} },
      booktitle = {TSRML Workshop @ NeurIPS},
      year = {2022}
    }
  • Dylan SlackSatyapriya KrishnaHimabindu LakkarajuSameer Singh.TalkToModel: Explaining Machine Learning Models with Interactive Natural Language Conversations . TSRML Workshop @ NeurIPS. 2022 Workshop
    Honoral Mention for Best Paper
    ArXiVCodeDemo, BibTex ]
    @inproceedings{talktomodel:tsrml22,
      author = {Dylan Slack and Satyapriya Krishna and Himabindu Lakkaraju and Sameer Singh},
      title = { {TalkToModel: Explaining Machine Learning Models with Interactive Natural Language Conversations } },
      booktitle = {TSRML Workshop @ NeurIPS},
      year = {2022}
    }
  • Himabindu LakkarajuDylan SlackYuxin ChenChenhao TanSameer Singh.Rethinking Explainability as a Dialogue: A Practitioner's Perspective . HCAI Workshop @ NeurIPS. 2022 Workshop
    ArXiV, BibTex ]
    @inproceedings{rethinking:hcai22,
      author = {Himabindu Lakkaraju and Dylan Slack and Yuxin Chen and Chenhao Tan and Sameer Singh},
      title = { {Rethinking Explainability as a Dialogue: A Practitioner's Perspective } },
      booktitle = {HCAI Workshop @ NeurIPS},
      year = {2022}
    }
2021
  • Dylan SlackSophie HilgardHimabindu LakkarajuSameer Singh.Counterfactual Explanations Can Be Manipulated. Neural Information Processing Systems (NeurIPS). 2021 Conference
    PDFArXiVNeurIPS Page, BibTex ]
    @inproceedings{manipcfs:neurips21,
      author = {Dylan Slack and Sophie Hilgard and Himabindu Lakkaraju and Sameer Singh},
      title = { {Counterfactual Explanations Can Be Manipulated} },
      booktitle = {Neural Information Processing Systems (NeurIPS)},
      year = {2021}
    }
  • Dylan SlackSophie HilgardSameer SinghHimabindu Lakkaraju.Reliable Post hoc Explanations Modeling Uncertainty in Explainability. Neural Information Processing Systems (NeurIPS). 2021 Conference
    NeurIPS PagePDFArXiV, BibTex ]
    @inproceedings{bayeslimeshap:neurips21,
      author = {Dylan Slack and Sophie Hilgard and Sameer Singh and Himabindu Lakkaraju},
      title = { {Reliable Post hoc Explanations Modeling Uncertainty in Explainability} },
      booktitle = {Neural Information Processing Systems (NeurIPS)},
      pages = {9391--9404},
      year = {2021}
    }
  • Dheeru DuaCicero Nogueira dos SantosPatrick NgBen AthiwaratkunBing XiangMatt GardnerSameer Singh.Generative Context Pair Selection for Multi-hop Question Answering. Empirical Methods in Natural Language Processing (EMNLP). 2021 Conference
    PDFArXiVACL Anthology, BibTex ]
    @inproceedings{genqa:emnlp21,
      author = {Dheeru Dua and Cicero Nogueira dos Santos and Patrick Ng and Ben Athiwaratkun and Bing Xiang and Matt Gardner and Sameer Singh},
      title = { {Generative Context Pair Selection for Multi-hop Question Answering} },
      booktitle = {Empirical Methods in Natural Language Processing (EMNLP)},
      doi = {10.18653/v1/2021.emnlp-main.561},
      pages = {7009–7015},
      year = {2021}
    }
  • Shayne LongpreKartik PerisetlaAnthony ChenNikhil RameshChris DuBoisSameer Singh.Entity-Based Knowledge Conflicts in Question Answering. Empirical Methods in Natural Language Processing (EMNLP). 2021 Conference
    PDFArXiVProject PageSource CodeACL Anthology, BibTex ]
    @inproceedings{qaconflicts:emnlp21,
      author = {Shayne Longpre and Kartik Perisetla and Anthony Chen and Nikhil Ramesh and Chris DuBois and Sameer Singh},
      title = { {Entity-Based Knowledge Conflicts in Question Answering} },
      booktitle = {Empirical Methods in Natural Language Processing (EMNLP)},
      doi = {10.18653/v1/2021.emnlp-main.565},
      pages = {7052–7063},
      year = {2021}
    }
  • Dheeru DuaPradeep DasigiSameer SinghMatt Gardner.Learning with Instance Bundles for Reading Comprehension. Empirical Methods in Natural Language Processing (EMNLP). 2021 Conference
    PDFArXiVACL Anthology, Abstract, BibTex ]
    When training most modern reading comprehension models, all the questions associated with a context are treated as being independent from each other. However, closely related questions and their corresponding answers are not independent, and leveraging these relationships could provide a strong supervision signal to a model. Drawing on ideas from contrastive estimation, we introduce several new supervision techniques that compare question-answer scores across multiple related instances. Specifically, we normalize these scores across various neighborhoods of closely contrasting questions and/or answers, adding another cross entropy loss term that is used in addition to traditional maximum likelihood estimation. Our techniques require bundles of related question-answer pairs, which we can either mine from within existing data or create using various automated heuristics. We empirically demonstrate the effectiveness of training with instance bundles on two datasets -- HotpotQA and ROPES -- showing up to 11% absolute gains in accuracy.
    @inproceedings{bundles:emnlp21,
      author = {Dheeru Dua and Pradeep Dasigi and Sameer Singh and Matt Gardner},
      title = { {Learning with Instance Bundles for Reading Comprehension} },
      booktitle = {Empirical Methods in Natural Language Processing (EMNLP)},
      doi = {10.18653/v1/2021.emnlp-main.584},
      pages = {7347–7357},
      year = {2021}
    }
  • Matt GardnerWilliam MerrillJesse DodgeMatthew PetersAlexis RossSameer SinghNoah A. Smith.Competency Problems: On Finding and Removing Artifacts in Language Data. Empirical Methods in Natural Language Processing (EMNLP). 2021 Conference
    PDFArXiVACL Anthology, BibTex ]
    @inproceedings{competency:emnlp21,
      author = {Matt Gardner and William Merrill and Jesse Dodge and Matthew Peters and Alexis Ross and Sameer Singh and Noah A. Smith},
      title = { {Competency Problems: On Finding and Removing Artifacts in Language Data} },
      booktitle = {Empirical Methods in Natural Language Processing (EMNLP)},
      doi = {10.18653/v1/2021.emnlp-main.135},
      pages = {1801–1813},
      year = {2021}
    }
  • Nitish GuptaSameer SinghMatt GardnerDan Roth.Paired Examples as Indirect Supervision in Latent Decision Models. Empirical Methods in Natural Language Processing (EMNLP). 2021 Conference
    PDFArXiVACL Anthology, BibTex ]
    @inproceedings{pairednmn:emnlp21,
      author = {Nitish Gupta and Sameer Singh and Matt Gardner and Dan Roth},
      title = { {Paired Examples as Indirect Supervision in Latent Decision Models} },
      booktitle = {Empirical Methods in Natural Language Processing (EMNLP)},
      doi = {10.18653/v1/2021.emnlp-main.466},
      pages = {5774–5785},
      year = {2021}
    }
  • Tony Z. ZhaoEric WallaceShi FengDan KleinSameer Singh.Calibrate Before Use: Improving Few-shot Performance of Language Models. International Conference on Machine Learning (ICML). 2021 Conference
    PDFArXiVICML PageVideo/Slides, BibTex ]
    @inproceedings{poisoning:icml21,
      author = {Tony Z. Zhao and Eric Wallace and Shi Feng and Dan Klein and Sameer Singh},
      title = { {Calibrate Before Use: Improving Few-shot Performance of Language Models} },
      booktitle = {International Conference on Machine Learning (ICML)},
      pages = {12697-12706},
      year = {2021}
    }
  • Anthony ChenPallavi GudipatiShayne LongpreXiao LingSameer Singh.Evaluating Entity Disambiguation and the Role of Popularity in Retrieval-Based NLP. Association for Computational Linguistics (ACL). 2021 Conference
    ACL AnthologyPDF, BibTex ]
    @inproceedings{amber:acl21,
      author = {Anthony Chen and Pallavi Gudipati and Shayne Longpre and Xiao Ling and Sameer Singh},
      title = { {Evaluating Entity Disambiguation and the Role of Popularity in Retrieval-Based NLP} },
      booktitle = {Association for Computational Linguistics (ACL)},
      doi = {10.18653/v1/2021.acl-long.345},
      pages = {4472–4485},
      year = {2021}
    }
  • Robert L. Logan IVAndrew McCallumSameer SinghDan Bikel.Benchmarking Scalable Methods for Streaming Cross Document Coreference. Association for Computational Linguistics (ACL). 2021 Conference
    ACL AnthologyPDF, BibTex ]
    @inproceedings{streamingcdcr:acl21,
      author = {Robert L. Logan IV and Andrew McCallum and Sameer Singh and Dan Bikel},
      title = { {Benchmarking Scalable Methods for Streaming Cross Document Coreference} },
      booktitle = {Association for Computational Linguistics (ACL)},
      doi = {10.18653/v1/2021.acl-long.364},
      pages = {4717–4731},
      year = {2021}
    }
  • Nitish GuptaSameer SinghMatt Gardner.Enforcing Consistency in Weakly Supervised Semantic Parsing. Association for Computational Linguistics (ACL). 2021 Conference
    ACL AnthologyPDF, BibTex ]
    @inproceedings{spconsistency:acl21,
      author = {Nitish Gupta and Sameer Singh and Matt Gardner},
      title = { {Enforcing Consistency in Weakly Supervised Semantic Parsing} },
      booktitle = {Association for Computational Linguistics (ACL)},
      doi = {10.18653/v1/2021.acl-short.22},
      pages = {168–174},
      year = {2021}
    }
  • Pouya PezeshkpourSarthak JainByron WallaceSameer Singh.An Empirical Comparison of Instance Attribution Methods for NLP. Annual Conference of the North American Chapter of the Association for Computational Linguistics (NAACL). 2021 Conference
    PDFArXiVACL Anthology, Abstract, BibTex ]
    Widespread adoption of deep pretrained (masked) neural language models has motivated a pressing need for approaches for interpreting network outputs and for facilitating model debugging. Instance attribution methods constitute one means of accomplishing these goals by retrieving training instances that (may have) led to a particular prediction. Influence functions (IF) provide machinery for doing this by quantifying the effect that perturbing individual train instances would have on a specific test prediction. However, even approximating the IF is computationally expensive, to a degree that may be prohibitive in many cases. Might simpler approaches (e.g., retrieving train instance most similar to a given test point) perform comparably? In this work we evaluate the degree to which different potential instance attribution agree with respect to the importance of training samples. We find that simple retrieval methods yield training instances that differ from those identified via gradient-based methods (such as the IF), but that nonetheless exhibit desirable characteristics similar to more complex attribution methods.
    @inproceedings{emp-instance:naacl21,
      author = {Pouya Pezeshkpour and Sarthak Jain and Byron Wallace and Sameer Singh},
      title = { {An Empirical Comparison of Instance Attribution Methods for NLP} },
      booktitle = {Annual Conference of the North American Chapter of the Association for Computational Linguistics (NAACL)},
      doi = {10.18653/v1/2021.naacl-main.75},
      pages = {967–975},
      year = {2021}
    }
  • Eric WallaceTony Z. ZhaoShi FengSameer Singh.Concealed Data Poisoning Attacks on NLP Models. Annual Conference of the North American Chapter of the Association for Computational Linguistics (NAACL). 2021 Conference
    PDFArXiVACL AnthologyWebsiteCode, Abstract, BibTex ]
    Adversarial attacks alter NLP model predictions by perturbing test-time inputs. However, it is much less understood whether, and how, predictions can be manipulated with small, concealed changes to the training data. In this work, we develop a new data poisoning attack that allows an adversary to control model predictions whenever a desired trigger phrase is present in the input. For instance, we insert 50 poison examples into a sentiment model’s training set that causes the model to frequently predict Positive whenever the input contains “James Bond”. Crucially, we craft these poison examples using a gradient-based procedure so that they do not mention the trigger phrase. We also apply our poison attack to language modeling (“Apple iPhone” triggers negative generations) and machine translation (“iced coffee” mistranslated as “hot coffee”). We conclude by proposing three defenses that can mitigate our attack at some cost in prediction accuracy or extra human annotation.
    @inproceedings{poisoning:naacl21,
      author = {Eric Wallace and Tony Z. Zhao and Shi Feng and Sameer Singh},
      title = { {Concealed Data Poisoning Attacks on NLP Models} },
      booktitle = {Annual Conference of the North American Chapter of the Association for Computational Linguistics (NAACL)},
      doi = {10.18653/v1/2021.naacl-main.13},
      pages = {139–150},
      year = {2021}
    }
  • Zhengli ZhaoSameer SinghHonglak LeeZizhao ZhangAugustus OdenaHan Zhang.Improved Consistency Regularization for GANs. AAAI Conference on Artificial Intelligence (AAAI). 2021 Conference
    PDFArXiVAAAI Page, Abstract, BibTex ]
    Recent work has increased the performance of Generative Adversarial Networks (GANs) by enforcing a consistency cost on the discriminator. We improve on this technique in several ways. We first show that consistency regularization can introduce artifacts into the GAN samples and explain how to fix this issue. We then propose several modifications to the consistency regularization procedure designed to improve its performance. We carry out extensive experiments quantifying the benefit of our improvements. For unconditional image synthesis on CIFAR-10 and CelebA, our modifications yield the best known FID scores on various GAN architectures. For conditional image synthesis on CIFAR-10, we improve the state-of-the-art FID score from 11.48 to 9.21. Finally, on ImageNet-2012, we apply our technique to the original BigGAN model and improve the FID from 6.66 to 5.38, which is the best score at that model size.
    @inproceedings{icrgan:aaai21,
      author = {Zhengli Zhao and Sameer Singh and Honglak Lee and Zizhao Zhang and Augustus Odena and Han Zhang},
      title = { {Improved Consistency Regularization for GANs} },
      booktitle = {AAAI Conference on Artificial Intelligence (AAAI)},
      doi = {10.1609/aaai.v35i12.17317},
      pages = {11033-11041},
      year = {2021}
    }
  • Matthew K LaffinCharles ZenderSameer SinghJ. Van WessemC. J. P. P. SmeetsC. H. Reijmer.Climatology and Evolution of the Antarctic Peninsula Föhn Wind‐Induced Melt Regime From 1979–2018. Journal of Geophysical Research: Atmospheres. 2021 Journal
    Journal, BibTex ]
    @article{fohn:jgr21,
      author = {Matthew K Laffin and Charles Zender and Sameer Singh and J. Van Wessem and C. J. P. P. Smeets and C. H. Reijmer},
      title = { {Climatology and Evolution of the Antarctic Peninsula Föhn Wind‐Induced Melt Regime From 1979–2018} },
      journal = {Journal of Geophysical Research: Atmospheres},
      volume = {126},
      number = {4},
      doi = {10.1029/2020JD033682},
      year = {2021}
    }
  • Robert L. Logan IVIvana BalaževićEric WallaceFabio PetroniSameer SinghSebastian Riedel.Cutting Down on Prompts and Parameters: Simple Few-Shot Learning with Language Models. NeurIPS Workshop on Efficient Natural Language and Speech Processing (ENLSP). 2021 Workshop
    Best Poster Award
    ArXiVPDFCode, BibTex ]
    @inproceedings{nullprompts:effnlp21,
      author = {Robert L. Logan IV and Ivana Balažević and Eric Wallace and Fabio Petroni and Sameer Singh and Sebastian Riedel},
      title = { {Cutting Down on Prompts and Parameters: Simple Few-Shot Learning with Language Models} },
      booktitle = {NeurIPS Workshop on Efficient Natural Language and Speech Processing (ENLSP)},
      year = {2021}
    }
  • Kolby NottinghamLitian LiangDaeyun ShinCharless C. FowlkesRoy FoxSameer Singh.Modular Framework for Visuomotor Language Grounding. Embodied AI Workshop at CVPR. 2021 Workshop
    PDF, BibTex ]
    @inproceedings{modulargl:embodied21,
      author = {Kolby Nottingham and Litian Liang and Daeyun Shin and Charless C. Fowlkes and Roy Fox and Sameer Singh},
      title = { {Modular Framework for Visuomotor Language Grounding} },
      booktitle = {Embodied AI Workshop at CVPR},
      year = {2021}
    }
  • Yasaman RazeghiRobert L. Logan IVSameer Singh.Deriving Behavioral Tests from Common Sense Knowledge Graphs. AAAI Workshop on Common Sense Knowledge Graphs (CSKGs). 2021 Workshop
    PDF, Abstract, BibTex ]
    Although NLP models have demonstrated “superhuman” performance on common sense reasoning tasks, it is unclear whether these models truly have common sense knowledge. Constructing evaluation datasets to test this knowledge is expensive due to the manual effort involved, and is also limited in scope. Meanwhile, common sense knowledge graphs (CSKGs) aim for a wide coverage of structured common sense knowledge, but can not be directly used for testing purposes. In this work, we introduce a semi-automated approach that leverages CSKGs to construct out-of-domain evaluation sets for NLP tasks that are more scalable than purely manual approaches. Using this procedure, we create test cases from two popular CSKGs—ConceptNet and ATOMIC—to test the common sense reasoning capability of models trained for natural language inference (NLI) and question answering (QA). These tests reveal interesting differences in failure modes of these models; models trained on NLI tend to perform better on tests of ontological knowledge, e.g. ’is a’ and ’used for’ relations, failing on tests that require understanding ’desires’, ’needs’, and ’wants’, while QA models perform better on tests that involve ’wants’, and ’desires’.
    @inproceedings{cskgtests:cskg21,
      author = {Yasaman Razeghi and Robert L. Logan IV and Sameer Singh},
      title = { {Deriving Behavioral Tests from Common Sense Knowledge Graphs} },
      booktitle = {AAAI Workshop on Common Sense Knowledge Graphs (CSKGs)},
      year = {2021}
    }
  • Zhouhang XieJonathan BrophyAdam NoackWencong YouKalyani AsthanaCarter PerkinsSabrina ReisZayd HammoudehDaniel LowdSameer Singh.What Models Know About Their Attackers: Deriving Attacker Information From Latent Representations. EMNLP Workshop on Analyzing and Interpreting Neural Networks for NLP (BlackBoxNLP). 2021 Workshop
    PDFACL Anthology, Abstract, BibTex ]
    Adversarial attacks curated against NLP models are increasingly becoming practical threats. Although various methods have been developed to detect adversarial attacks, securing learning-based NLP systems in practice would require more than identifying and evading perturbed instances. To address these issues, we propose a new set of adversary identification tasks, Attacker Attribute Classification via Textual Analysis (AACTA), that attempts to obtain more detailed information about the attackers from adversarial texts. Specifically, given a piece of adversarial text, we hope to accomplish tasks such as localizing perturbed tokens, identifying the attacker’s access level to the target model, determining the evasion mechanism imposed, and specifying the perturbation type employed by the attacking algorithm. Our contributions are as follows: we formalize the task of classifying attacker attributes, and create a benchmark on various target models from sentiment classification and abuse detection domains. We show that signals from BERT models and target models can be used to train classifiers that reveal the properties of the attacking algorithms. We demonstrate that adversarial attacks leave interpretable traces in both feature spaces of pre-trained language models and target models, making AACTA a promising direction towards more trustworthy NLP systems.
    @inproceedings{advdetect:bbox21,
      author = {Zhouhang Xie and Jonathan Brophy and Adam Noack and Wencong You and Kalyani Asthana and Carter Perkins and Sabrina Reis and Zayd Hammoudeh and Daniel Lowd and Sameer Singh},
      title = { {What Models Know About Their Attackers: Deriving Attacker Information From Latent Representations} },
      booktitle = {EMNLP Workshop on Analyzing and Interpreting Neural Networks for NLP (BlackBoxNLP)},
      year = {2021}
    }
2020
  • Taylor ShinYasaman RazeghiRobert L. Logan IVEric WallaceSameer Singh.AutoPrompt: Eliciting Knowledge from Language Models with Automatically Generated Prompts . Empirical Methods in Natural Language Processing (EMNLP). 2020 Conference
    PDFWebsiteACL Anthology, Abstract, BibTex ]
    The remarkable success of pretrained language models has motivated the study of what kinds of knowledge these models learn during pretraining. Reformulating tasks as fill-in-the-blanks problems (e.g., cloze tests) is a natural approach for gauging such knowledge, however, its usage is limited by the manual effort and guesswork required to write suitable prompts. To address this, we develop AutoPrompt, an automated method to create prompts for a diverse set of tasks, based on a gradient-guided search. Using AutoPrompt, we show that masked language models (MLMs) have an inherent capability to perform sentiment analysis and natural language inference without additional parameters or finetuning, sometimes achieving performance on par with recent state-of-the-art supervised models. We also show that our prompts elicit more accurate factual knowledge from MLMs than the manually created prompts on the LAMA benchmark, and that MLMs can be used as relation extractors more effectively than supervised relation extraction models. These results demonstrate that automatically generated prompts are a viable parameter-free alternative to existing probing methods, and as pretrained LMs become more sophisticated and capable, potentially a replacement for finetuning.
    @inproceedings{autoprompt:emnlp20,
      author = {Taylor Shin and Yasaman Razeghi and Robert L. Logan IV and Eric Wallace and Sameer Singh},
      title = { {AutoPrompt: Eliciting Knowledge from Language Models with Automatically Generated Prompts } },
      booktitle = {Empirical Methods in Natural Language Processing (EMNLP)},
      pages = {4222–4235},
      year = {2020}
    }
  • Anthony ChenGabriel StanovskySameer SinghMatt Gardner.MOCHA: A Dataset for Training and Evaluating Generative Reading Comprehension Metrics. Empirical Methods in Natural Language Processing (EMNLP). 2020 Conference
    PDFWebsiteACL Anthology, Abstract, BibTex ]
    Posing reading comprehension as a generation problem provides a great deal of flexibility, allowing for open-ended questions with few restrictions on possible answers. However, progress is impeded by existing generation metrics, which rely on token overlap and are agnostic to the nuances of reading comprehension. To address this, we introduce a benchmark for training and evaluating generative reading comprehension metrics: MOdeling Correctness with Human Annotations. MOCHA contains 40K human judgement scores on model outputs from 6 diverse question answering datasets and an additional set of minimal pairs for evaluation. Using MOCHA, we train a Learned Evaluation metric for Reading Comprehension, LERC, to mimic human judgement scores. LERC outperforms baseline metrics by 10 to 36 absolute Pearson points on held-out annotations. When we evaluate robustness on minimal pairs, LERC achieves 80% accuracy, outperforming baselines by 14 to 26 absolute percentage points while leaving significant room for improvement. MOCHA presents a challenging problem for developing accurate and robust generative reading comprehension metrics.
    @inproceedings{mocha:emnlp20,
      author = {Anthony Chen and Gabriel Stanovsky and Sameer Singh and Matt Gardner},
      title = { {MOCHA: A Dataset for Training and Evaluating Generative Reading Comprehension Metrics} },
      booktitle = {Empirical Methods in Natural Language Processing (EMNLP)},
      pages = {6521–6532},
      year = {2020}
    }
  • Junlin WangJens TuylsEric WallaceSameer Singh.Gradient-based Analysis of NLP Models is Manipulable. Findings of the Association for Computational Linguistics: EMNLP (EMNLP Findings). 2020 Conference
    PDFWebsite, BibTex ]
    @inproceedings{facade:femnlp20,
      author = {Junlin Wang and Jens Tuyls and Eric Wallace and Sameer Singh},
      title = { {Gradient-based Analysis of NLP Models is Manipulable} },
      booktitle = {Findings of the Association for Computational Linguistics: EMNLP (EMNLP Findings)},
      pages = {247–258},
      year = {2020}
    }
  • Matt GardnerYoav ArtziVictoria BasmovJonathan BerantBen BoginSihao ChenPradeep DasigiDheeru DuaYanai ElazarAnanth GottumukkalaNitish GuptaHannaneh HajishirziGabriel IlharcoDaniel KhashabiKevin LinJiangming LiuNelson F. LiuPhoebe MulcaireQiang NingSameer SinghNoah A. SmithSanjay SubramanianReut TsarfatyEric WallaceAlly ZhangBen Zhou.Evaluating Models’ Local Decision Boundaries via Contrast Sets. Findings of the Association for Computational Linguistics: EMNLP (EMNLP Findings). 2020 Conference
    PDF, BibTex ]
    @inproceedings{contrast:femnlp20,
      author = {Matt Gardner and Yoav Artzi and Victoria Basmov and Jonathan Berant and Ben Bogin and Sihao Chen and Pradeep Dasigi and Dheeru Dua and Yanai Elazar and Ananth Gottumukkala and Nitish Gupta and Hannaneh Hajishirzi and Gabriel Ilharco and Daniel Khashabi and Kevin Lin and Jiangming Liu and Nelson F. Liu and Phoebe Mulcaire and Qiang Ning and Sameer Singh and Noah A. Smith and Sanjay Subramanian and Reut Tsarfaty and Eric Wallace and Ally Zhang and Ben Zhou},
      title = { {Evaluating Models’ Local Decision Boundaries via Contrast Sets} },
      booktitle = {Findings of the Association for Computational Linguistics: EMNLP (EMNLP Findings)},
      pages = {1307–1323},
      year = {2020}
    }
  • Sanjay SubramanianLucy Lu WangBen BoginSachin MehtaMadeleine van ZuylenSravanthi ParasaSameer SinghMatt GardnerHannaneh Hajishirzi.MedICaT: A Dataset of Medical Images, Captions, and Textual References. Findings of the Association for Computational Linguistics: EMNLP (EMNLP Findings). 2020 Conference
    PDF, BibTex ]
    @inproceedings{medicat:femnlp20,
      author = {Sanjay Subramanian and Lucy Lu Wang and Ben Bogin and Sachin Mehta and Madeleine van Zuylen and Sravanthi Parasa and Sameer Singh and Matt Gardner and Hannaneh Hajishirzi},
      title = { {MedICaT: A Dataset of Medical Images, Captions, and Textual References} },
      booktitle = {Findings of the Association for Computational Linguistics: EMNLP (EMNLP Findings)},
      pages = {2112–2120},
      year = {2020}
    }
  • Marco Tulio RibeiroTongshuang WuCarlos GuestrinSameer Singh.Beyond Accuracy: Behavioral Testing of NLP models with CheckList. Association for Computational Linguistics (ACL). 2020 Conference
    Best Paper Award
    PDFCodeACL AnthologyVideo+SlidesArXiV, Abstract, BibTex ]
    Although measuring held-out accuracy has been the primary approach to evaluate generalization, it often overestimates the performance of NLP models, while alternative approaches for evaluating models either focus on individual tasks or on specific behaviors. Inspired by principles of behavioral testing in software engineering, we introduce CheckList, a task-agnostic methodology for testing NLP models. CheckList includes a matrix of general linguistic capabilities and test types that facilitate comprehensive test ideation, as well as a software tool to generate a large and diverse number of test cases quickly. We illustrate the utility of CheckList with tests for three tasks, identifying critical failures in both commercial and state-of-art models. In a user study, a team responsible for a commercial sentiment analysis model found new and actionable bugs in an extensively tested model. In another user study, NLP practitioners with CheckList created twice as many tests, and found almost three times as many bugs as users without it.
    @inproceedings{checklist:acl20,
      author = {Marco Tulio Ribeiro and Tongshuang Wu and Carlos Guestrin and Sameer Singh},
      title = { {Beyond Accuracy: Behavioral Testing of NLP models with CheckList} },
      booktitle = {Association for Computational Linguistics (ACL)},
      pages = {4902-4912},
      year = {2020}
    }
  • Robert L. Logan IVMatt GardnerSameer Singh.On Importance Sampling-Based Evaluation of Latent Language Models. Association for Computational Linguistics (ACL). 2020 Conference
    PDFACL AnthologyVideo+Slides, Abstract, BibTex ]
    Language models that use additional latent structures (e.g., syntax trees, coreference chains, knowledge graph links) provide several advantages over traditional language models. However, likelihood-based evaluation of these models is often intractable as it requires marginalizing over the latent space. Existing works avoid this issue by using importance sampling. Although this approach has asymptotic guarantees, analysis is rarely conducted on the effect of decisions such as sample size and choice of proposal distribution on the reported estimates. In this paper, we carry out this analysis for three models: RNNG, EntityNLM, and KGLM. In addition, we elucidate subtle differences in how importance sampling is applied in these works that can have substantial effects on the final estimates, as well as provide theoretical results which reinforce the validity of this technique.
    @inproceedings{impsample:acl20,
      author = {Robert L. Logan IV and Matt Gardner and Sameer Singh},
      title = { {On Importance Sampling-Based Evaluation of Latent Language Models} },
      booktitle = {Association for Computational Linguistics (ACL)},
      pages = {2171-2176},
      year = {2020}
    }
  • Sanjay SubramanianBen BoginNitish GuptaTomer WolfsonSameer SinghJonathan BerantMatt Gardner.Obtaining Faithful Interpretations from Compositional Neural Networks. Association for Computational Linguistics (ACL). 2020 Conference
    PDFACL AnthologyArXiVVideo+Slides, Abstract, BibTex ]
    Neural module networks (NMNs) are a popular approach for modeling compositionality: they achieve high accuracy when applied to problems in language and vision, while reflecting the compositional structure of the problem in the network architecture. However, prior work implicitly assumed that the structure of the network modules, describing the abstract reasoning process, provides a faithful explanation of the model’s reasoning; that is, that all modules perform their intended behaviour. In this work, we propose and conduct a systematic evaluation of the intermediate outputs of NMNs on NLVR2 and DROP, two datasets which require composing multiple reasoning steps. We find that the intermediate outputs differ from the expected output, illustrating that the network structure does not provide a faithful explanation of model behaviour. To remedy that, we train the model with auxiliary supervision and propose particular choices for module architecture that yield much better faithfulness, at a minimal cost to accuracy.
    @inproceedings{nmninterpret:acl20,
      author = {Sanjay Subramanian and Ben Bogin and Nitish Gupta and Tomer Wolfson and Sameer Singh and Jonathan Berant and Matt Gardner},
      title = { {Obtaining Faithful Interpretations from Compositional Neural Networks} },
      booktitle = {Association for Computational Linguistics (ACL)},
      pages = {5594-5608},
      year = {2020}
    }
  • Dheeru DuaSameer SinghMatt Gardner.Benefits of Intermediate Annotations in Reading Comprehension. Association for Computational Linguistics (ACL). 2020 Conference
    PDFACL AnthologyVideo+Slides, Abstract, BibTex ]
    Complex compositional reading comprehension datasets require performing latent sequential decisions that are learned via supervision from the final answer. A large combinatorial space of possible decision paths that result in the same answer, compounded by the lack of intermediate supervision to help choose the right path, makes the learning particularly hard for this task. In this work, we study the benefits of collecting intermediate reasoning supervision along with the answer during data collection. We find that these intermediate annotations can provide two-fold benefits. First, we observe that for any collection budget, spending a fraction of it on intermediate annotations results in improved model performance, for two complex compositional datasets: DROP and Quoref. Second, these annotations encourage the model to learn the correct latent reasoning steps, helping combat some of the biases introduced during the data collection process.
    @inproceedings{intannot:acl20,
      author = {Dheeru Dua and Sameer Singh and Matt Gardner},
      title = { {Benefits of Intermediate Annotations in Reading Comprehension} },
      booktitle = {Association for Computational Linguistics (ACL)},
      pages = {5627-5634},
      year = {2020}
    }
  • Ananth GottumukkalaDheeru DuaSameer SinghMatt Gardner.Dynamic Sampling Strategies for Multi-Task Reading Comprehension. Association for Computational Linguistics (ACL). 2020 Conference
    PDFACL AnthologyVideo+Slides, Abstract, BibTex ]
    Building general reading comprehension systems, capable of solving multiple datasets at the same time, is a recent aspirational goal in the research community. Prior work has focused on model architecture or generalization to held out datasets, and largely passed over the particulars of the multi-task learning set up. We show that a simple dynamic sampling strategy, selecting instances for training proportional to the multi-task model’s current performance on a dataset relative to its single task performance, gives substantive gains over prior multi-task sampling strategies, mitigating the catastrophic forgetting that is common in multi-task learning. We also demonstrate that allowing instances of different tasks to be interleaved as much as possible between each epoch and batch has a clear benefit in multitask performance over forcing task homogeneity at the epoch or batch level. Our final model shows greatly increased performance over the best model on ORB, a recently-released multitask reading comprehension benchmark.
    @inproceedings{dynsample:acl20,
      author = {Ananth Gottumukkala and Dheeru Dua and Sameer Singh and Matt Gardner},
      title = { {Dynamic Sampling Strategies for Multi-Task Reading Comprehension} },
      booktitle = {Association for Computational Linguistics (ACL)},
      pages = {920-924},
      year = {2020}
    }
  • Pouya PezeshkpourYifan TianSameer Singh.Revisiting Evaluation of Knowledge Base Completion Models. Automated Knowledge Base Construction (AKBC). 2020 Conference
    Runner-up for Best Paper Award
    PDFYago3-TC DataVideo+SlidesOpenReviewAKBC Page, Abstract, BibTex ]
    Representing knowledge graphs (KGs) by learning embeddings for entities and relations has led to accurate models for existing KG completion benchmarks. However, due to the open-world assumption of existing KGs, evaluation of KG completion uses ranking metrics and triple classification with negative samples, and is thus unable to directly assess models on the goals of the task: completion. In this paper, we first study the shortcomings of these evaluation metrics. Specifically, we demonstrate that these metrics (1) are unreliable for estimating how calibrated the models are, (2) make strong assumptions that are often violated, and 3) do not sufficiently, and consistently, differentiate embedding methods from each other, or from simpler approaches. To address these issues, we gather a semi-complete KG referred as YAGO3-TC, using a random subgraph from the test and validation data of YAGO3-10, which enables us to compute accurate triple classification accuracy on this data. Conducting thorough experiments on existing models, we provide new insights and directions for the KG completion research. Along with the dataset and the open source implementation of the models, we also provide a leaderboard for knowledge graph completion that consists of a hidden, and growing, test set, available at https://pouyapez.github.io/yago3-tc/.
    @inproceedings{kbeval:akbc20,
      author = {Pouya Pezeshkpour and Yifan Tian and Sameer Singh},
      title = { {Revisiting Evaluation of Knowledge Base Completion Models} },
      booktitle = {Automated Knowledge Base Construction (AKBC)},
      year = {2020}
    }
  • Dan BarseverSameer SinghEmre Neftci.Building a Better Lie Detector with BERT: The Difference Between Truth and Lies. International Joint Conference on Neural Networks (IJCNN). 2020 Conference
    PDF, BibTex ]
    @inproceedings{bertdecept:ijcnn20,
      author = {Dan Barsever and Sameer Singh and Emre Neftci},
      title = { {Building a Better Lie Detector with BERT: The Difference Between Truth and Lies} },
      booktitle = {International Joint Conference on Neural Networks (IJCNN)},
      year = {2020}
    }
  • Nitish GuptaKevin LinDan RothSameer SinghMatt Gardner.Neural Module Networks for Reasoning over Text. International Conference on Learning Representations (ICLR). 2020 Conference
    PDFarXivOpenReviewCode, Abstract, BibTex ]
    Answering compositional questions that require multiple steps of reasoning against text is challenging, especially when they involve discrete, symbolic operations. Neural module networks (NMNs) learn to parse such questions as executable programs composed of learnable modules, performing well on synthetic visual QA domains. However, we find that it is challenging to learn these models for non-synthetic questions on open-domain text, where a model needs to deal with the diversity of natural language and perform a broader range of reasoning. We extend NMNs by: (a) introducing modules that reason over a paragraph of text, performing symbolic reasoning (such as arithmetic, sorting, counting) over numbers and dates in a probabilistic and differentiable manner; and (b) proposing an unsupervised auxiliary loss to help extract arguments associated with the events in text. Additionally, we show that a limited amount of heuristically-obtained question program and intermediate module output supervision provides sufficient inductive bias for accurate learning. Our proposed model significantly outperforms state-of-the-art models on a subset of the DROP dataset that poses a variety of reasoning challenges that are covered by our modules.
    @inproceedings{nmn:iclr20,
      author = {Nitish Gupta and Kevin Lin and Dan Roth and Sameer Singh and Matt Gardner},
      title = { {Neural Module Networks for Reasoning over Text} },
      booktitle = {International Conference on Learning Representations (ICLR)},
      year = {2020}
    }
  • Piyush GuptaNikaash PuriSukriti VermaDhruv KayasthaShripad DeshmukhBalaji KrishnamurthySameer Singh.Explain Your Move: Understanding Agent Actions Using Specific and Relevant Feature Attribution. International Conference on Learning Representations (ICLR). 2020 Conference
    PDFProject pagearXivCode+DataOpenReview, Abstract, BibTex ]
    As deep reinforcement learning (RL) is applied to more tasks, there is a need to visualize and understand the behavior of learned agents. Saliency maps explain agent behavior by highlighting the features of the input state that are most relevant for the agent in taking an action. Existing perturbation-based approaches to compute saliency often highlight regions of the input that are not relevant to the action taken by the agent. Our proposed approach, SARFA (Specific and Relevant Feature Attribution), generates more focused saliency maps by balancing two aspects (specificity and relevance) that capture different desiderata of saliency. The first captures the impact of perturbation on the relative expected reward of the action to be explained. The second downweighs irrelevant features that alter the relative expected rewards of actions other than the action to be explained. We compare SARFA with existing approaches on agents trained to play board games (Chess and Go) and Atari games (Breakout, Pong and Space Invaders). We show through illustrative examples (Chess, Atari, Go), human studies (Chess), and automated evaluation methods (Chess) that SARFA generates saliency maps that are more interpretable for humans than existing approaches. For the code release and demo videos, see: https://nikaashpuri.github.io/sarfa-saliency/.
    @inproceedings{salrl:iclr20,
      author = {Piyush Gupta and Nikaash Puri and Sukriti Verma and Dhruv Kayastha and Shripad Deshmukh and Balaji Krishnamurthy and Sameer Singh},
      title = { {Explain Your Move: Understanding Agent Actions Using Specific and Relevant Feature Attribution} },
      booktitle = {International Conference on Learning Representations (ICLR)},
      year = {2020}
    }
  • Sameer Singh.Minecraft as a Platform for Project-Based Learning in AI. AAAI Symposium on Educational Advances in Artificial Intelligence (EAAI). 2020 Conference
    PDFWebsitePosterSpotlightAAAI Page, Abstract, BibTex ]
    Undergraduate courses that focus on open-ended, projectbased learning teach students how to define concrete goals, transfer conceptual understanding of algorithms to code, and evaluate/analyze/present their solution. However, AI, along with machine learning, is getting increasingly varied in terms of both the approaches and applications, making it challenging to design project courses that span a sufficiently wide spectrum of AI. For these reasons, existing AI project courses are restricted to a narrow set of approaches (e.g. only reinforcement learning) or applications (e.g. only computer vision).
    In this paper, we propose to use Minecraft as the platform for teaching AI via project-based learning. Minecraft is an open-world sandbox game with elements of exploration, resource gathering, crafting, construction, and combat, and is supported by the Malmo library that provides a programmatic interface to the player observations and actions at various levels of granularity. In Minecraft, students can design projects to use approaches like search-based AI, reinforcement learning, supervised learning, and constraint satisfaction, on data types like text, audio, images, and tabular data. We describe our experience with an open-ended, undergraduate AI projects course using Minecraft that includes 82 different projects, covering themes that ranged from navigation, instruction following, object detection, combat, and music/image generation.
    @inproceedings{malmo:eaai20,
      author = {Sameer Singh},
      title = { {Minecraft as a Platform for Project-Based Learning in AI} },
      booktitle = {AAAI Symposium on Educational Advances in Artificial Intelligence (EAAI)},
      doi = {10.1609/aaai.v34i09.7070},
      pages = {13504-13505},
      year = {2020}
    }
  • Dylan SlackSophie HilgardEmily JiaSameer SinghHimabindu Lakkaraju.Fooling LIME and SHAP: Adversarial Attacks on Post hoc Explanation Methods. AAAI/ACM Conference on AI, Ethics, and Society (AIES). 2020 Conference
    PDFarXivACM Page, Abstract, BibTex ]
    As machine learning black boxes are increasingly being deployed in domains such as healthcare and criminal justice, there is growing emphasis on building tools and techniques for explaining these black boxes in an interpretable manner. Such explanations are being leveraged by domain experts to diagnose systematic errors and underlying biases of black boxes. In this paper, we demonstrate that post hoc explanations techniques that rely on input perturbations, such as LIME and SHAP, are not reliable. Specifically, we propose a novel scaffolding technique that effectively hides the biases of any given classifier by allowing an adversarial entity to craft an arbitrary desired explanation. Our approach can be used to scaffold any biased classifier in such a way that its predictions on the input data distribution still remain biased, but the post hoc explanations of the scaffolded classifier look innocuous. Using extensive evaluation with multiple real-world datasets (including COMPAS), we demonstrate how extremely biased (racist) classifiers crafted by our framework can easily fool popular explanation techniques such as LIME and SHAP into generating innocuous explanations which do not reflect the underlying biases.
    @inproceedings{advlime:aies20,
      author = {Dylan Slack and Sophie Hilgard and Emily Jia and Sameer Singh and Himabindu Lakkaraju},
      title = { {Fooling LIME and SHAP: Adversarial Attacks on Post hoc Explanation Methods} },
      booktitle = {AAAI/ACM Conference on AI, Ethics, and Society (AIES)},
      doi = {10.1145/3375627.3375830},
      pages = {180-186},
      year = {2020}
    }
  • Yoshitomo MatsubaraDavide CallegaroSabur BaidyaMarco LevoratoSameer Singh.Head Network Distillation: Splitting Distilled Deep Neural Networks for Resource-Constrained Edge Computing Systems. IEEE Access. 2020 Journal
    Journal, BibTex ]
    @article{headnet:ieee20,
      author = {Yoshitomo Matsubara and Davide Callegaro and Sabur Baidya and Marco Levorato and Sameer Singh},
      title = { {Head Network Distillation: Splitting Distilled Deep Neural Networks for Resource-Constrained Edge Computing Systems} },
      journal = {IEEE Access},
      volume = {126},
      number = {4},
      doi = {10.1109/ACCESS.2020.3039714},
      year = {2020}
    }
  • Pouya PezeshkpourZhengli ZhaoSameer Singh.On the Utility of Active Instance Selection for Few-Shot Learning. NeurIPS Workshop on Human And Model in the Loop Evaluation and Training Strategies (HAMLETS). 2020 Workshop
    PDFOpenReview, BibTex ]
    @inproceedings{activefew:hamlets20,
      author = {Pouya Pezeshkpour and Zhengli Zhao and Sameer Singh},
      title = { {On the Utility of Active Instance Selection for Few-Shot Learning} },
      booktitle = {NeurIPS Workshop on Human And Model in the Loop Evaluation and Training Strategies (HAMLETS)},
      year = {2020}
    }
  • Tamanna HossainRobert L. Logan IVArjuna UgarteYoshitomo MatsubaraSean YoungSameer Singh.COVIDLies: Detecting COVID-19 Misinformation on Social Media. EMNLP NLP Covid19 Workshop. 2020 Workshop
    Best Paper Award
    PDFACL AnthologyWebsite (w/ demo), Abstract, BibTex ]
    The ongoing pandemic has heightened the need for developing tools to flag COVID-19-related misinformation on the internet, specifically on social media such as Twitter. However, due to novel language and the rapid change of information, existing misinformation detection datasets are not effective for evaluating systems designed to detect misinformation on this topic. Misinformation detection can be divided into two sub-tasks: (i) retrieval of misconceptions relevant to posts being checked for veracity, and (ii) stance detection to identify whether the posts Agree, Disagree, or express No Stance towards the retrieved misconceptions. To facilitate research on this task, we release COVIDLies (https://ucinlp.github.io/covid19), a dataset of 6761 expert-annotated tweets to evaluate the performance of misinformation detection systems on 86 different pieces of COVID-19 related misinformation. We evaluate existing NLP systems on this dataset, providing initial benchmarks and identifying key challenges for future models to improve upon.
    @inproceedings{covidlies:nlpcovid20,
      author = {Tamanna Hossain and Robert L. Logan IV and Arjuna Ugarte and Yoshitomo Matsubara and Sean Young and Sameer Singh},
      title = { {COVIDLies: Detecting COVID-19 Misinformation on Social Media} },
      booktitle = {EMNLP NLP Covid19 Workshop},
      doi = {10.18653/v1/2020.nlpcovid19-2.11},
      year = {2020}
    }
  • Bahareh HarandizadehSameer Singh.Tweeki: Linking Named Entities on Twitter to a Knowledge Graph. EMNLP Workshop on Noisy, User-generated Text (W-NUT). 2020 Workshop
    PDFACL Anthology, Abstract, BibTex ]
    To identify what entities are being talked about in tweets, we need to automatically link named entities that appear in tweets to structured KBs like WikiData. Existing approaches often struggle with such short, noisy texts, or their complex design and reliance on supervision make them brittle, difficult to use and maintain, and lose significance over time. Further, there is a lack of a large, linked corpus of tweets to aid researchers, along with lack of gold dataset to evaluate the accuracy of entity linking. In this paper, we introduce (1) Tweeki, an unsupervised, modular entity linking system for Twitter, (2) TweekiData, a large, automatically-annotated corpus of Tweets linked to entities in WikiData, and (3) TweekiGold, a gold dataset for entity linking evaluation. Through comprehensive analysis, we show that Tweeki is comparable to the performance of recent state-of-the-art entity linkers models, the dataset is of high quality, and a use case of how the dataset can be used to improve downstream tasks in social media analysis (geolocation prediction).
    @inproceedings{tweeki:wnut20,
      author = {Bahareh Harandizadeh and Sameer Singh},
      title = { {Tweeki: Linking Named Entities on Twitter to a Knowledge Graph} },
      booktitle = {EMNLP Workshop on Noisy, User-generated Text (W-NUT)},
      doi = {10.18653/v1/2020.wnut-1.29},
      year = {2020}
    }
  • Yoshitomo MatsubaraSameer Singh.Citations Beyond Self Citations: Identifying Authors, Affiliations, and Nationalities in Scientific Papers. Workshop on Mining Scientific Publications (WOSP). 2020 Workshop
    PDFCodeACL Anthology, Abstract, BibTex ]
    The question of the utility of the blind peer-review system is fundamental to scientific research. Some studies investigate exactly how “blind” the papers are in the double-blind review system by manually or automatically identifying the true authors, mainly suggesting the number of self-citations in the submitted manuscripts as the primary signal for identity. However, related work on the automated approaches are limited by the sizes of their datasets and the restricted experimental setup, thus they lack practical insights into the blind review process. In this work, we train models that identify the authors, their affiliations, and their nationalities through real-world, large-scale experiments on the Microsoft Academic Graph, including the cold start scenario. Our models are accurate; we identify at least one of authors, affiliations, and nationalities of held-out papers with 40.3%, 47.9% and 86.0% accuracy respectively, from the top-10 guesses of our models. However, through insights from the model, we demonstrate that these entities are identifiable with a small number of guesses primarily by using a combination of self-citations, social, and common citations. Moreover, our further analysis on the results leads to interesting findings, such as that prominent affiliations are easily identifiable (e.g. 93.8% of test papers written by Microsoft are identified with top-10 guesses). The experimental results show, against conventional belief, that the self-citations are no more informative than looking at the common citations, thus suggesting that removing self-citations is not sufficient for authors to maintain their anonymity.
    @inproceedings{deblind:wosp20,
      author = {Yoshitomo Matsubara and Sameer Singh},
      title = { {Citations Beyond Self Citations: Identifying Authors, Affiliations, and Nationalities in Scientific Papers} },
      booktitle = {Workshop on Mining Scientific Publications (WOSP)},
      year = {2020}
    }
  • Pouya PezeshkpourZhengli ZhaoSameer Singh.Data Importance-Based Active Learning for Limited Labels. CVPR Workshop on Visual Learning with Limited Labels (VL3). 2020 Workshop
    Video, BibTex ]
    @inproceedings{ibal:vl320,
      author = {Pouya Pezeshkpour and Zhengli Zhao and Sameer Singh},
      title = { {Data Importance-Based Active Learning for Limited Labels} },
      booktitle = {CVPR Workshop on Visual Learning with Limited Labels (VL3)},
      year = {2020}
    }
2019
  • Eric WallaceShi FengNikhil KandpalMatt GardnerSameer Singh.Universal Adversarial Triggers for Attacking and Analyzing NLP. Empirical Methods in Natural Language Processing (EMNLP). 2019 Conference
    PDFarXivBlog postCodeACL Anthology, Abstract, BibTex ]
    Adversarial examples highlight model vulnerabilities and are useful for evaluation and interpretation. We define universal adversarial triggers: input-agnostic sequences of tokens that trigger a model to produce a specific prediction when concatenated to any input from a dataset. We propose a gradient-guided search over tokens which finds short trigger sequences (e.g., one word for classification and four words for language modeling) that successfully trigger the target prediction. For example, triggers cause SNLI entailment accuracy to drop from 89.94% to 0.55%, 72% of “why” questions in SQuAD to be answered “to kill american people”, and the GPT-2 language model to spew racist output even when conditioned on non-racial contexts. Furthermore, although the triggers are optimized using white-box access to a specific model, they transfer to other models for all tasks we consider. Finally, since triggers are input-agnostic, they provide an analysis of global model behavior. For instance, they confirm that SNLI models exploit dataset biases and help to diagnose heuristics learned by reading comprehension models.
    @inproceedings{trigger:emnlp19,
      author = {Eric Wallace and Shi Feng and Nikhil Kandpal and Matt Gardner and Sameer Singh},
      title = { {Universal Adversarial Triggers for Attacking and Analyzing NLP} },
      booktitle = {Empirical Methods in Natural Language Processing (EMNLP)},
      doi = {10.18653/v1/D19-1221},
      pages = {2153-2162},
      year = {2019}
    }
  • Eric WallaceYizhong WangSujian LiSameer SinghMatt Gardner.Do NLP Models Know Numbers? Probing Numeracy in Embeddings. Empirical Methods in Natural Language Processing (EMNLP). 2019 Conference
    PDFarXivACL Anthology, BibTex ]
    @inproceedings{numeracy:emnlp19,
      author = {Eric Wallace and Yizhong Wang and Sujian Li and Sameer Singh and Matt Gardner},
      title = { {Do NLP Models Know Numbers? Probing Numeracy in Embeddings} },
      booktitle = {Empirical Methods in Natural Language Processing (EMNLP)},
      doi = {10.18653/v1/D19-1534},
      pages = {5307-5315},
      year = {2019}
    }
  • Matthew E. PetersMark NeumannRobert L. Logan IVRoy SchwartzVidur JoshiSameer SinghNoah A. Smith.Knowledge Enhanced Contextual Word Representations. Empirical Methods in Natural Language Processing (EMNLP). 2019 Conference
    PDFarXivACL Anthology, BibTex ]
    @inproceedings{knobert:emnlp19,
      author = {Matthew E. Peters and Mark Neumann and Robert L. Logan IV and Roy Schwartz and Vidur Joshi and Sameer Singh and Noah A. Smith},
      title = { {Knowledge Enhanced Contextual Word Representations} },
      booktitle = {Empirical Methods in Natural Language Processing (EMNLP)},
      doi = {10.18653/v1/D19-1005},
      pages = {43-54},
      year = {2019}
    }
  • Robert L. Logan IVNelson F. LiuMatthew E. PetersMatt GardnerSameer Singh.Barack's Wife Hillary: Using Knowledge Graphs for Fact-Aware Language Modeling. Association for Computational Linguistics (ACL). 2019 Conference
    PDFarXivDataCodeACL Anthology, Abstract, BibTex ]
    Modeling human language requires the ability to not only generate fluent text but also encode factual knowledge. However, traditional language models are only capable of remembering facts seen at training time, and often have difficulty recalling them. To address this, we introduce the knowledge graph language model (KGLM), a neural language model with mechanisms for selecting and copying facts from a knowledge graph that are relevant to the context. These mechanisms enable the model to render information it has never seen before, as well as generate out-of-vocabulary tokens. We also introduce the Linked WikiText-2 dataset, a corpus of annotated text aligned to the Wikidata knowledge graph whose contents (roughly) match the popular WikiText-2 benchmark. In experiments, we demonstrate that the KGLM achieves significantly better performance than a strong baseline language model. We additionally compare different language model’s ability to complete sentences requiring factual knowledge, showing that the KGLM outperforms even very large language models in generating facts.
    @inproceedings{kglm:acl19,
      author = {Robert L. Logan IV and Nelson F. Liu and Matthew E. Peters and Matt Gardner and Sameer Singh},
      title = { {Barack's Wife Hillary: Using Knowledge Graphs for Fact-Aware Language Modeling} },
      booktitle = {Association for Computational Linguistics (ACL)},
      doi = {10.18653/v1/P19-1598},
      pages = {5962-5971},
      year = {2019}
    }
  • Marco Tulio RibeiroCarlos GuestrinSameer Singh.Are Red Roses Red? Evaluating Consistency of Question-Answering Models. Association for Computational Linguistics (ACL). 2019 Conference
    PDFACL Anthology, BibTex ]
    @inproceedings{impl:acl19,
      author = {Marco Tulio Ribeiro and Carlos Guestrin and Sameer Singh},
      title = { {Are Red Roses Red? Evaluating Consistency of Question-Answering Models} },
      booktitle = {Association for Computational Linguistics (ACL)},
      doi = {10.18653/v1/P19-1621},
      pages = {6174-6184},
      year = {2019}
    }
  • Sewon MinEric WallaceSameer SinghMatt GardnerHannaneh HajishirziLuke Zettlemoyer.Compositional Questions Do Not Necessitate Multi-hop Reasoning. Association for Computational Linguistics (ACL). 2019 Conference
    PDFarXivACL Anthology, BibTex ]
    @inproceedings{mhop:acl19,
      author = {Sewon Min and Eric Wallace and Sameer Singh and Matt Gardner and Hannaneh Hajishirzi and Luke Zettlemoyer},
      title = { {Compositional Questions Do Not Necessitate Multi-hop Reasoning} },
      booktitle = {Association for Computational Linguistics (ACL)},
      doi = {10.18653/v1/P19-1416},
      pages = {4249-4257},
      year = {2019}
    }
  • Dheeru DuaYizhong WangPradeep DasigiGabriel StanovskySameer SinghMatt Gardner.DROP: A Reading Comprehension Benchmark Requiring Discrete Reasoning Over Paragraphs. Annual Conference of the North American Chapter of the Association for Computational Linguistics (NAACL). 2019 Conference
    PDFWebsitearXivDataACL AnthologyLeaderboardDemo, Abstract, BibTex ]
    Reading comprehension has recently seen rapid progress, with systems matching humans on the most popular datasets for the task. However, a large body of work has highlighted the brittleness of these systems, showing that there is much work left to be done. We introduce a new reading comprehension benchmark, DROP, which requires Discrete Reasoning Over the content of Paragraphs. In this crowdsourced, adversarially-created, 55k-question benchmark, a system must resolve references in a question, perhaps to multiple input positions, and perform discrete operations over them (such as addition, counting, or sorting). These operations require a much more comprehensive understanding of the content of paragraphs, as they remove the paraphrase-and-entity-typing shortcuts available in prior datasets. We apply state-of-the-art methods from both the reading comprehension and semantic parsing literatures on this dataset and show that the best systems only achieve 38.4% F1 on our generalized accuracy metric, while expert human performance is 96%. We additionally present a new model that combines reading comprehension methods with simple numerical reasoning to achieve 51% F1.
    @inproceedings{drop:naacl19,
      author = {Dheeru Dua and Yizhong Wang and Pradeep Dasigi and Gabriel Stanovsky and Sameer Singh and Matt Gardner},
      title = { {DROP: A Reading Comprehension Benchmark Requiring Discrete Reasoning Over Paragraphs} },
      booktitle = {Annual Conference of the North American Chapter of the Association for Computational Linguistics (NAACL)},
      doi = {10.18653/v1/N19-1246},
      pages = {2368-2378},
      year = {2019}
    }
  • Pouya PezeshkpourYifan TianSameer Singh.Investigating Robustness and Interpretability of Link Prediction via Adversarial Modifications. Annual Conference of the North American Chapter of the Association for Computational Linguistics (NAACL). 2019 Conference
    PDFWebsitearXivCodeVideoACL Anthology, Abstract, BibTex ]
    Representing entities and relations in an embedding space is a well-studied approach for machine learning on relational data. Existing approaches, however, primarily focus on improving accuracy and overlook other aspects such as robustness and interpretability. In this paper, we propose adversarial modifications for link prediction models: identifying the fact to add into or remove from the knowledge graph that changes the prediction for a target fact after the model is retrained. Using these single modifications of the graph, we identify the most influential fact for a predicted link and evaluate the sensitivity of the model to the addition of fake facts. We introduce an efficient approach to estimate the effect of such modifications by approximating the change in the embeddings when the knowledge graph changes. To avoid the combinatorial search over all possible facts, we train a network to decode embeddings to their corresponding graph components, allowing the use of gradient-based optimization to identify the adversarial modification. We use these techniques to evaluate the robustness of link prediction models (by measuring sensitivity to additional facts), study interpretability through the facts most responsible for predictions (by identifying the most influential neighbors), and detect incorrect facts in the knowledge base.
    @inproceedings{criage:naacl19,
      author = {Pouya Pezeshkpour and Yifan Tian and Sameer Singh},
      title = { {Investigating Robustness and Interpretability of Link Prediction via Adversarial Modifications} },
      booktitle = {Annual Conference of the North American Chapter of the Association for Computational Linguistics (NAACL)},
      doi = {10.18653/v1/N19-1337},
      pages = {3336-3347},
      year = {2019}
    }
  • Ananya AnanyaNitya ParthasarthiSameer Singh.GenderQuant: Quantifying Mention-Level Genderedness. Annual Conference of the North American Chapter of the Association for Computational Linguistics (NAACL). 2019 Conference
    PDFWebsiteCodeACL Anthology, Abstract, BibTex ]
    Language is gendered if the context surrounding a mention is suggestive of a particular binary gender for that mention. Detecting the different ways in which language is gendered is an important task since gendered language can bias NLP models (such as for coreference resolution). This task is challenging since genderedness is often expressed in subtle ways. Existing approaches need considerable annotation efforts for each language, domain, and author, and often require handcrafted lexicons and features. Additionally, these approaches do not provide a quantifiable measure of how gendered the text is, nor are they applicable at the fine-grained mention level.
    In this paper, we use existing NLP pipelines to automatically annotate gender of mentions in the text. On corpora labeled using this method, we train a supervised classifier to predict the gender of any mention from its context and evaluate it on unseen text. The model confidence for a mention's gender can be used as a proxy to indicate the level of genderedness of the context. We test this gendered language detector on movie summaries, movie reviews, news articles, and fiction novels, achieving an AUC-ROC of up to 0.71, and observe that the model predictions agree with human judgments collected for this task. We also provide examples of detected gendered sentences from aforementioned domains.
    @inproceedings{gender:naacl19,
      author = {Ananya Ananya and Nitya Parthasarthi and Sameer Singh},
      title = { {GenderQuant: Quantifying Mention-Level Genderedness} },
      booktitle = {Annual Conference of the North American Chapter of the Association for Computational Linguistics (NAACL)},
      doi = {10.18653/v1/N19-1303},
      pages = {2959-2969},
      year = {2019}
    }
  • Jun Seok KangRobert L. Logan IVZewei ChuYang ChenDheeru DuaKevin GimpelSameer SinghNiranjan Balasubramanian.PoMo: Generating Entity-Specific Post-Modifiers in Context. Annual Conference of the North American Chapter of the Association for Computational Linguistics (NAACL). 2019 Conference
    PDFWebsitearXivDataACL Anthology, Abstract, BibTex ]
    We introduce entity post-modifier generation as an instance of a collaborative writing task. Given a sentence about a target entity, the task is to automatically generate a post-modifier phrase that provides contextually relevant information about the entity. For example, for the sentence, "Barack Obama, _______, supported the #MeToo movement.", the phrase "a father of two girls" is a contextually relevant post-modifier. To this end, we build PoMo, a post-modifier dataset created automatically from news articles reflecting a journalistic need for incorporating entity information that is relevant to a particular news event. PoMo consists of more than 231K sentences with post-modifiers and associated facts extracted from Wikidata for around 57K unique entities. We use crowdsourcing to show that modeling contextual relevance is necessary for accurate post-modifier generation.
    We adapt a number of existing generation approaches as baselines for this dataset. Our results show there is large room for improvement in terms of both identifying relevant facts to include (knowing which claims are relevant gives a >20% improvement in BLEU score), and generating appropriate post-modifier text for the context (providing relevant claims is not sufficient for accurate generation). We conduct an error analysis that suggests promising directions for future research.
    @inproceedings{pomo:naacl19,
      author = {Jun Seok Kang and Robert L. Logan IV and Zewei Chu and Yang Chen and Dheeru Dua and Kevin Gimpel and Sameer Singh and Niranjan Balasubramanian},
      title = { {PoMo: Generating Entity-Specific Post-Modifiers in Context} },
      booktitle = {Annual Conference of the North American Chapter of the Association for Computational Linguistics (NAACL)},
      doi = {10.18653/v1/N19-1089},
      pages = {826-838},
      year = {2019}
    }
  • Eric WallaceJens TuylsJunlin WangSanjay SubramanianMatt GardnerSameer Singh.AllenNLP Interpret: A Framework for Explaining Predictions of NLP Models. Demo at the Empirical Methods in Natural Language Processing (EMNLP). 2019 Demo
    Best Demonstration Paper Award.
    PDFProject PageACL AnthologyArXivPoster, Abstract, BibTex ]
    Neural NLP models are increasingly accurate but are imperfect and opaque---they break in counterintuitive ways and leave end users puzzled at their behavior. Model interpretation methods ameliorate this opacity by providing explanations for specific model predictions. Unfortunately, existing interpretation codebases make it difficult to apply these methods to new models and tasks, which hinders adoption for practitioners and burdens interpretability researchers. We introduce AllenNLP Interpret, a flexible framework for interpreting NLP models. The toolkit provides interpretation primitives (e.g., input gradients) for any AllenNLP model and task, a suite of built-in interpretation methods, and a library of front-end visualization components. We demonstrate the toolkit's flexibility and utility by implementing live demos for five interpretation methods (e.g., saliency maps and adversarial attacks) on a variety of models and tasks (e.g., masked language modeling using BERT and reading comprehension using BiDAF). These demos, alongside our code and tutorials, are available at https://allennlp.org/interpret.
    @inproceedings{interpret:emnlp19,
      author = {Eric Wallace and Jens Tuyls and Junlin Wang and Sanjay Subramanian and Matt Gardner and Sameer Singh},
      title = { {AllenNLP Interpret: A Framework for Explaining Predictions of NLP Models} },
      booktitle = {Demo at the Empirical Methods in Natural Language Processing (EMNLP)},
      doi = {10.18653/v1/D19-3002},
      pages = {7-12},
      year = {2019}
    }
  • Jihyun ParkDimitrios KotziasPatty KuoRobert L. Logan IVKritzia MercedSameer SinghMichael TananaEfi Karra-TaniskidouJennifer Elston LafataDavid C. AtkinsMing Tai-SealeZac E ImelPadhraic Smyth.Detecting Conversation Topics in Primary Care Office Visits from Transcripts of Patient-Provider Interactions. Journal of the American Medical Informatics Association. 2019 Journal
    PDFWebsite, BibTex ]
    @article{convtopics:jamia19,
      author = {Jihyun Park and Dimitrios Kotzias and Patty Kuo and Robert L. Logan IV and Kritzia Merced and Sameer Singh and Michael Tanana and Efi Karra-Taniskidou and Jennifer Elston Lafata and David C. Atkins and Ming Tai-Seale and Zac E Imel and Padhraic Smyth},
      title = { {Detecting Conversation Topics in Primary Care Office Visits from Transcripts of Patient-Provider Interactions} },
      journal = {Journal of the American Medical Informatics Association},
      volume = {26},
      number = {12},
      doi = {10.1093/jamia/ocz140},
      pages = {1493-1504},
      year = {2019}
    }
  • Sameer Singh.Comment on Semantic Based Adversarial Examples Fool Face Recognition. Synced Review. 2019 Online
    Article, BibTex ]
    @misc{review:synced19,
      author = {Sameer Singh},
      title = { {Comment on Semantic Based Adversarial Examples Fool Face Recognition} },
      editor = {Synced Review},
      month = {August},
      url = {https://syncedreview.com/2019/08/09/semantic-based-adversarial-examples-fool-face-recognition/},
      year = {2019}
    }
  • Yoshitomo MatsubaraSabur BaidyaDavide CallegaroMarco LevoratoSameer Singh.Distilled Split Deep Neural Networks for Edge-Assisted Real-Time Systems. Mobicom Workshop on Hot Topics in Video Analytics and Intelligent Edges. 2019 Workshop
    PDF, BibTex ]
    @inproceedings{distill:hottopics19,
      author = {Yoshitomo Matsubara and Sabur Baidya and Davide Callegaro and Marco Levorato and Sameer Singh},
      title = { {Distilled Split Deep Neural Networks for Edge-Assisted Real-Time Systems} },
      booktitle = {Mobicom Workshop on Hot Topics in Video Analytics and Intelligent Edges},
      year = {2019}
    }
  • Anthony ChenGabriel StanovskySameer SinghMatt Gardner.Evaluating Question Answering Evaluation. Workshop on Machine Reading and Question Answering (MRQA). 2019 Workshop
    Best Paper Award.
    PDF, BibTex ]
    @inproceedings{evalqa:mrqa19,
      author = {Anthony Chen and Gabriel Stanovsky and Sameer Singh and Matt Gardner},
      title = { {Evaluating Question Answering Evaluation} },
      booktitle = {Workshop on Machine Reading and Question Answering (MRQA)},
      year = {2019}
    }
  • Dheeru DuaAnanth GottumukkalaAlon TalmorSameer SinghMatt Gardner.ORB: An Open Reading Benchmark for Comprehensive Evaluation of Machine Reading Comprehension. Workshop on Machine Reading and Question Answering (MRQA). 2019 Workshop
    PDF, BibTex ]
    @inproceedings{orb:mrqa19,
      author = {Dheeru Dua and Ananth Gottumukkala and Alon Talmor and Sameer Singh and Matt Gardner},
      title = { {ORB: An Open Reading Benchmark for Comprehensive Evaluation of Machine Reading Comprehension} },
      booktitle = {Workshop on Machine Reading and Question Answering (MRQA)},
      year = {2019}
    }
  • Sanjay SubramanianSameer SinghMatt Gardner.Analyzing Compositionality of Visual Question Answering. NeurIPS Workshop on Visually Grounded Interaction and Language (ViGIL). 2019 Workshop
    PDF, BibTex ]
    @inproceedings{compvqa:vigil19,
      author = {Sanjay Subramanian and Sameer Singh and Matt Gardner},
      title = { {Analyzing Compositionality of Visual Question Answering} },
      booktitle = {NeurIPS Workshop on Visually Grounded Interaction and Language (ViGIL)},
      year = {2019}
    }
  • Zhengli ZhaoNicolas PapernotSameer SinghNeoklis PolyzotisAugustus Odena.Improving Differentially Private Models with Active Learning. NeurIPS Workshop on Privacy in Machine Learning (PriML). 2019 Workshop
    PDFarXiv, BibTex ]
    @inproceedings{dpal:priml19,
      author = {Zhengli Zhao and Nicolas Papernot and Sameer Singh and Neoklis Polyzotis and Augustus Odena},
      title = { {Improving Differentially Private Models with Active Learning} },
      booktitle = {NeurIPS Workshop on Privacy in Machine Learning (PriML)},
      year = {2019}
    }
2018
  • Forest AgostinelliGuillaume HocquetSameer SinghPierre Baldi.From Reinforcement Learning to Deep Reinforcement Learning: An Overview. Braverman Readings in Machine Learning: Key Ideas from Inception to Current State, Springer Press. 2018 Chapter
    PDF (Springer)SpringerAmazonGoogle Books, BibTex ]
    @incollection{deeprl:chap18,
      author = {Forest Agostinelli and Guillaume Hocquet and Sameer Singh and Pierre Baldi},
      title = { {From Reinforcement Learning to Deep Reinforcement Learning: An Overview} },
      booktitle = {Braverman Readings in Machine Learning: Key Ideas from Inception to Current State, Springer Press},
      pages = {298-328},
      year = {2018}
    }
  • Pouya PezeshkpourLiyan ChenSameer Singh.Embedding Multimodal Relational Data for Knowledge Base Completion. Empirical Methods in Natural Language Processing (EMNLP). 2018 Conference
    PDFCode/DataarXivACL AnthologyVideo, Abstract, BibTex ]
    Representing entities and relations in an embedding space is a well-studied approach for machine learning on relational data. Existing approaches, however, primarily focus on simple link structure between a finite set of entities, ignoring the variety of data types that are often used in knowledge bases, such as text, images, and numerical values. In this paper, we propose multimodal knowledge base embeddings (MKBE) that use different neural encoders for this variety of observed data, and combine them with existing relational models to learn embeddings of the entities and multimodal data. Further, using these learned embedings and different neural decoders, we introduce a novel multimodal imputation model to generate missing multimodal values, like text and images, from information in the knowledge base. We enrich existing relational datasets to create two novel benchmarks that contain additional information such as textual descriptions and images of the original entities. We demonstrate that our models utilize this additional information effectively to provide more accurate link prediction, achieving state-of-the-art results with a considerable gap of 5-7% over existing methods. Further, we evaluate the quality of our generated multimodal values via a user study.
    @inproceedings{mmkb:emnlp18,
      author = {Pouya Pezeshkpour and Liyan Chen and Sameer Singh},
      title = { {Embedding Multimodal Relational Data for Knowledge Base Completion} },
      booktitle = {Empirical Methods in Natural Language Processing (EMNLP)},
      doi = {10.18653/v1/D18-1359},
      pages = {3208-3218},
      year = {2018}
    }
  • Marzieh SaeidiMax BartoloPatrick LewisSameer SinghTim RocktaschelMike SheldonGuillaume BouchardSebastian Riedel.Interpretation of Natural Language Rules in Conversational Machine Reading. Empirical Methods in Natural Language Processing (EMNLP). 2018 Conference
    PDFarXivACL Anthology, Abstract, BibTex ]
    Most work in machine reading focuses on question answering problems where the answer is directly expressed in the text to read. However, many real-world question answering problems require the reading of text not because it contains the literal answer, but because it contains a recipe to derive an answer together with the reader's background knowledge. One example is the task of interpreting regulations to answer "Can I...?" or "Do I have to...?" questions such as "I am working in Canada. Do I have to carry on paying UK National Insurance?" after reading a UK government website about this topic. This task requires both the interpretation of rules and the application of background knowledge. It is further complicated due to the fact that, in practice, most questions are underspecified, and a human assistant will regularly have to ask clarification questions such as "How long have you been working abroad?" when the answer cannot be directly derived from the question and text. In this paper, we formalise this task and develop a crowd-sourcing strategy to collect 32k task instances based on real-world rules and crowd-generated questions and scenarios. We analyse the challenges of this task and assess its difficulty by evaluating the performance of rule-based and machine-learning baselines. We observe promising results when no background knowledge is necessary, and substantial room for improvement whenever background knowledge is needed.
    @inproceedings{quarc:emnlp18,
      author = {Marzieh Saeidi and Max Bartolo and Patrick Lewis and Sameer Singh and Tim Rocktaschel and Mike Sheldon and Guillaume Bouchard and Sebastian Riedel},
      title = { {Interpretation of Natural Language Rules in Conversational Machine Reading} },
      booktitle = {Empirical Methods in Natural Language Processing (EMNLP)},
      doi = {10.18653/v1/D18-1233},
      pages = {2087-2097},
      year = {2018}
    }
  • Marco Tulio RibeiroSameer SinghCarlos Guestrin.Semantically Equivalent Adversarial Rules for Debugging NLP models. Association for Computational Linguistics (ACL). 2018 Conference
    Honorable Mention for Best Paper.
    PDFAppendixCodeACL AnthologyVideoSlides, Abstract, BibTex ]
    Complex machine learning models for NLP are often brittle, making different predictions for input instances that are extremely similar semantically. To automatically detect this behavior for individual instances, we present semantically equivalent adversaries (SEAs) - semantic-preserving perturbations that induce changes in the model’s predictions. We generalize these adversaries into semantically equivalent adversarial rules (SEARs) - simple, universal replacement rules that induce adversaries on many instances. We demonstrate the usefulness and flexibility of SEAs and SEARs by detecting bugs in black-box state-of-the-art models for three domains: machine comprehension, visual question-answering, and sentiment analysis. Via user studies, we demonstrate that we generate high-quality local adversaries for more instances than humans, and that SEARs induce four times as many mistakes as the bugs discovered by human experts. SEARs are also actionable: retraining models using data augmentation significantly reduces bugs, while maintaining accuracy.
    @inproceedings{sears:acl18,
      author = {Marco Tulio Ribeiro and Sameer Singh and Carlos Guestrin},
      title = { {Semantically Equivalent Adversarial Rules for Debugging NLP models} },
      booktitle = {Association for Computational Linguistics (ACL)},
      doi = {10.18653/v1/P18-1079},
      pages = {856-865},
      year = {2018}
    }
  • Zhengli ZhaoDheeru DuaSameer Singh.Generating Natural Adversarial Examples. International Conference on Learning Representations (ICLR). 2018 Conference
    PDFSource CodearXivOpenReview, Abstract, BibTex ]
    Due to their complex nature, it is hard to characterize the ways in which machine learning models can misbehave or be exploited when deployed. Recent work on adversarial examples, i.e. inputs with minor perturbations that result in substantially different model predictions, is helpful in evaluating the robustness of these models by exposing the adversarial scenarios where they fail. However, these malicious perturbations are often unnatural, not semantically meaningful, and not applicable to complicated domains such as language. In this paper, we propose a framework to generate natural and legible adversarial examples that lie on the data manifold, by searching in semantic space of dense and continuous data representation, utilizing the recent advances in generative adversarial networks. We present generated adversaries to demonstrate the potential of the proposed approach for black-box classifiers for a wide range of applications such as image classification, textual entailment, and machine translation. We include experiments to show that the generated adversaries are natural, legible to humans, and useful in evaluating and analyzing black-box classifiers.
    @inproceedings{natadv:iclr18,
      author = {Zhengli Zhao and Dheeru Dua and Sameer Singh},
      title = { {Generating Natural Adversarial Examples} },
      booktitle = {International Conference on Learning Representations (ICLR)},
      year = {2018}
    }
  • Forough ArabshahiSameer SinghAnimashree Anandkumar.Combining Symbolic Expressions and Black-box Function Evaluations for Training Neural Programs. International Conference on Learning Representations (ICLR). 2018 Conference
    PDFSource CodearXivOpenReview, Abstract, BibTex ]
    Neural programming involves training neural networks to learn programs, mathematics, or logic from data. Previous works have failed to achieve good generalization performance, especially on problems and programs with high complexity or on large domains. This is because they mostly rely either on black-box function evaluations that do not capture the structure of the program, or on detailed execution traces that are expensive to obtain, and hence the training data has poor coverage of the domain under consideration. We present a novel framework that utilizes black-box function evaluations, in conjunction with symbolic expressions that define relationships between the given functions. We employ tree LSTMs to incorporate the structure of the symbolic expression trees. We use tree encoding for numbers present in function evaluation data, based on their decimal representation. We present an evaluation benchmark for this task to demonstrate our proposed model combines symbolic reasoning and function evaluation in a fruitful manner, obtaining high accuracies in our experiments. Our framework generalizes significantly better to expressions of higher depth and is able to fill partial equations with valid completions.
    @inproceedings{funeval:iclr18,
      author = {Forough Arabshahi and Sameer Singh and Animashree Anandkumar},
      title = { {Combining Symbolic Expressions and Black-box Function Evaluations for Training Neural Programs} },
      booktitle = {International Conference on Learning Representations (ICLR)},
      year = {2018}
    }
  • Marco Tulio RibeiroSameer SinghCarlos Guestrin.Anchors: High-Precision Model-Agnostic Explanations. AAAI Conference on Artificial Intelligence (AAAI). 2018 Conference
    PDFCode (package)Code (results)AAAI Page, Abstract, BibTex ]
    We introduce a novel model-agnostic system that explains the behavior of complex models with high-precision rules called anchors, representing local, “sufficient” conditions for predictions. We propose an algorithm to efficiently compute these explanations for any black-box model with high-probability guarantees. We demonstrate the flexibility of anchors by explaining a myriad of different models for different domains and tasks. In a user study, we show that anchors enable users to predict how a model would behave on unseen instances with less effort and higher precision, as compared to existing linear explanations or no explanations.
    @inproceedings{anchors:aaai18,
      author = {Marco Tulio Ribeiro and Sameer Singh and Carlos Guestrin},
      title = { {Anchors: High-Precision Model-Agnostic Explanations} },
      booktitle = {AAAI Conference on Artificial Intelligence (AAAI)},
      pages = {1527-1535},
      year = {2018}
    }
  • Yanbing BaiChang GaoSameer SinghMagaly KochBruno AdrianoErick MasShunichi Koshimura.A Framework of Rapid Regional Tsunami Damage Recognition from Post-event TerraSAR-X Imagery Using Deep Neural Networks. IEEE Geoscience and Remote Sensing Letters. 2018 Journal
    PDFIEEE, Abstract, BibTex ]
    Near real-time building damage mapping is an indispensable prerequisite for governments to make decisions for disaster relief. With high-resolution synthetic aperture radar (SAR) systems, such as TerraSAR-X, the provision of such products in a fast and effective way becomes possible. In this letter, a deep learning-based framework for rapid regional tsunami damage recognition using post-event SAR imagery is proposed. To perform such a rapid damage mapping, a series of tile-based image split analysis is employed to generate the data set. Next, a selection algorithm with the SqueezeNet network is developed to swiftly distinguish between built-up (BU) and nonbuilt-up regions. Finally, a recognition algorithm with a modified wide residual network is developed to classify the BU regions into wash away, collapsed, and slightly damaged regions. Experiments performed on the TerraSAR-X data from the 2011 Tohoku earthquake and tsunami in Japan show a BU region extraction accuracy of 80.4% and a damage-level recognition accuracy of 74.8%, respectively. Our framework takes around 2 h to train on a new region, and only several minutes for prediction.
    @article{tsunami:geosense18,
      author = {Yanbing Bai and Chang Gao and Sameer Singh and Magaly Koch and Bruno Adriano and Erick Mas and Shunichi Koshimura},
      title = { {A Framework of Rapid Regional Tsunami Damage Recognition from Post-event TerraSAR-X Imagery Using Deep Neural Networks} },
      journal = {IEEE Geoscience and Remote Sensing Letters},
      volume = {15},
      number = {1},
      doi = {10.1109/LGRS.2017.2772349},
      pages = {43-47},
      year = {2018}
    }
  • Forough ArabshahiSameer SinghAnimashree Anandkumar.Towards Solving Differential Equations through Neural Programming. ICML Workshop on Neural Abstract Machines and Program Induction (NAMPI). 2018 Workshop
    PDFPoster, BibTex ]
    @inproceedings{diffeqeval:nampi18,
      author = {Forough Arabshahi and Sameer Singh and Animashree Anandkumar},
      title = { {Towards Solving Differential Equations through Neural Programming} },
      booktitle = {ICML Workshop on Neural Abstract Machines and Program Induction (NAMPI)},
      year = {2018}
    }
2017
  • Nitish GuptaSameer SinghDan Roth.Entity Linking via Joint Encoding of Types, Descriptions, and Context. Empirical Methods in Natural Language Processing (EMNLP). 2017 Conference
    PDFCodeACL AnthologyWebsite, Abstract, BibTex ]
    For accurate entity linking, we need to capture various information aspects of an entity, such as its description in a KB, contexts in which it is mentioned, and structured knowledge. Additionally, a linking system should work on texts from different domains without requiring domain-specific training data or hand-engineered features.
    In this work we present a neural, modular entity linking system that learns a unified dense representation for each entity using multiple sources of information, such as its description, contexts around its mentions, and its fine-grained types. We show that the resulting entity linking system is effective at combining these sources, and performs competitively, sometimes out-performing current state-of-the-art systems across datasets, without requiring any domain-specific training data or hand-engineered features. We also show that our model can effectively "embed" entities that are new to the KB, and is able to link its mentions accurately.
    @inproceedings{neuralel:emnlp17,
      author = {Nitish Gupta and Sameer Singh and Dan Roth},
      title = { {Entity Linking via Joint Encoding of Types, Descriptions, and Context} },
      booktitle = {Empirical Methods in Natural Language Processing (EMNLP)},
      month = {September},
      doi = {10.18653/v1/D17-1284},
      pages = {2681-2690},
      year = {2017}
    }
  • Igor BuragoDavide CallegaroMarco LevoratoSameer Singh.Intelligent Data Filtering in Constrained IoT Systems. Asilomar Conference on Signals, Systems, and Computers. 2017 Invited
    PDFIEEE Xplore, Abstract, BibTex ]
    The expansion of complex autonomous sensing and control mechanisms in the Internet-of-Things systems clashes with constraints on computation and wireless communication resources. In this paper, we propose a framework to address this conflict for applications in which resolution using a centralized architecture with a general-purpose compression of observations is not appropriate. Three approaches for distributing observation detection workload between sensing and processing devices are considered for sensor systems within wireless islands. Each of the approaches is formulated for the shared configuration of a sensor-edge system, in which the network structure, observation monitoring problem, and machine learning-based detector implementing it are not modified. For every approach, a high-level strategy for realization of the detector for different assumptions on the relation between its complexity and the system's constraints is considered. In each case, the potential for the constraints' satisfaction is shown to exist and be exploitable via division, approximation, and delegation of the detector's workload to the sensing devices off the edge processor. We present examples of applications that benefit from the proposed approaches.
    @inproceedings{semcompress:asilomar17,
      author = {Igor Burago and Davide Callegaro and Marco Levorato and Sameer Singh},
      title = { {Intelligent Data Filtering in Constrained IoT Systems} },
      booktitle = {Asilomar Conference on Signals, Systems, and Computers},
      year = {2017}
    }
  • Igor BuragoMarco LevoratoSameer Singh.Semantic Compression for Edge-Assisted Systems. Information Theory and Applications (ITA) Workshop. 2017 Invited
    PDFArXiv version, BibTex ]
    @inproceedings{semcompress:ita17,
      author = {Igor Burago and Marco Levorato and Sameer Singh},
      title = { {Semantic Compression for Edge-Assisted Systems} },
      booktitle = {Information Theory and Applications (ITA) Workshop},
      month = {February},
      year = {2017}
    }
  • Zhengli ZhaoDheeru DuaSameer Singh.Generating Natural Adversarial Examples. NeurIPS Workshop on Machine Deception. 2017 Workshop
    Amazon Best Poster Award at the Southern California Machine Learning Symposium.
    Shorter version of the paper at ICLR 2018.
    PDFArXiv (full paper), Abstract, BibTex ]
    Due to their complex nature, it is hard to characterize the ways in which machine learning models can misbehave or be exploited when deployed. Recent work on adversarial examples, i.e. inputs with minor perturbations that result in substantially different model predictions, is helpful in evaluating the robustness of these models by exposing the adversarial scenarios where they fail. However, these malicious perturbations are often unnatural, not semantically meaningful, and not applicable to complicated domains such as language. In this paper, we propose a framework to generate natural and legible adversarial examples by searching in semantic space of dense and continuous data representation, utilizing the recent advances in generative adversarial networks. We present generated adversaries to demonstrate the potential of the proposed approach for black-box classifiers in a wide range of applications such as image classification, textual entailment, and machine translation. We include experiments to show that the generated adversaries are natural, legible to humans, and useful in evaluating and analyzing black-box classifiers.
    @inproceedings{natadv:mldecept17,
      author = {Zhengli Zhao and Dheeru Dua and Sameer Singh},
      title = { {Generating Natural Adversarial Examples} },
      booktitle = {NeurIPS Workshop on Machine Deception},
      year = {2017}
    }
  • Ananya AnanyaSameer Singh.How Biased Are We? Automated Detection of Gendered Language. ACL Workshop on Women and Underrepresented Minorities in NLP (WiNLP). 2017 Workshop
    Also presented at the NeurIPS 2017 Workshop for Women in Machine Learning (WiML).
    PDF, BibTex ]
    @inproceedings{gender:winlp17,
      author = {Ananya Ananya and Sameer Singh},
      title = { {How Biased Are We? Automated Detection of Gendered Language} },
      booktitle = {ACL Workshop on Women and Underrepresented Minorities in NLP (WiNLP)},
      month = {August},
      year = {2017}
    }
  • Pouya PezeshkpourCarlos GuestrinSameer Singh.Compact Factorization of Matrices Using Generalized Round-Rank. Southern California Machine Learning Symposium. 2017 Workshop
    PDF, BibTex ]
    @inproceedings{grank:southcal17,
      author = {Pouya Pezeshkpour and Carlos Guestrin and Sameer Singh},
      title = { {Compact Factorization of Matrices Using Generalized Round-Rank} },
      booktitle = {Southern California Machine Learning Symposium},
      year = {2017}
    }
  • Pouya PezeshkpourLiyan ChenSameer Singh.Embedding Multimodal Relational Data. Workshop on Automated Knowledge Base Construction (AKBC). 2017 Workshop
    PDF, BibTex ]
    @inproceedings{mmkbe:akbc17,
      author = {Pouya Pezeshkpour and Liyan Chen and Sameer Singh},
      title = { {Embedding Multimodal Relational Data} },
      booktitle = {Workshop on Automated Knowledge Base Construction (AKBC)},
      year = {2017}
    }
  • Robert L. Logan IVSamuel HumeauSameer Singh.Multimodal Attribute Extraction. Workshop on Automated Knowledge Base Construction (AKBC). 2017 Workshop
    PDF, BibTex ]
    @inproceedings{maed:akbc17,
      author = {Robert L. Logan IV and Samuel Humeau and Sameer Singh},
      title = { {Multimodal Attribute Extraction} },
      booktitle = {Workshop on Automated Knowledge Base Construction (AKBC)},
      year = {2017}
    }
  • Parisa KordjamshidiSameer SinghDaniel KhashabiChristos ChristodoulopoulosMark SummonsSaurabh SinhaDan Roth.Relational Learning and Feature Extraction by Querying over Heterogeneous Information Networks. International Workshop on Statistical Relational AI (StarAI). 2017 Workshop
    PDFArXiv version, Abstract, BibTex ]
    Many real world systems need to operate on heterogeneous information networks that consist of numerous interacting components of different types. Examples include systems that perform data analysis on biological information networks; social networks; and information extraction systems processing unstructured data to convert raw text to knowledge graphs. Many previous works describe specialized approaches to perform specific types of analysis, mining and learning on such networks. In this work, we propose a unified framework consisting of a data model -a graph with a first order schema along with a declarative language for constructing, querying and manipulating such networks in ways that facilitate relational and structured machine learning. In particular, we provide an initial prototype for a relational and graph traversal query language where queries are directly used as relational features for structured machine learning models. Feature extraction is performed by making declarative graph traversal queries. Learning and inference models can directly operate on this relational representation and augment it with new data and knowledge that, in turn, is integrated seamlessly into the relational structure to support new predictions. We demonstrate this system's capabilities by showcasing tasks in natural language processing and computational biology domains.
    @inproceedings{saul:starai17,
      author = {Parisa Kordjamshidi and Sameer Singh and Daniel Khashabi and Christos Christodoulopoulos and Mark Summons and Saurabh Sinha and Dan Roth},
      title = { {Relational Learning and Feature Extraction by Querying over Heterogeneous Information Networks} },
      booktitle = {International Workshop on Statistical Relational AI (StarAI)},
      month = {July},
      year = {2017}
    }
2016
  • Parisa KordjamshidiDaniel KhashabiChristos ChristodoulopoulosBhargav MangipudiSameer SinghDan Roth.Better call Saul: Flexible Programming for Learning and Inference in NLP. International Conference on Computational Linguistics (COLING). 2016 Conference
    PDFACL Anthology, BibTex ]
    @inproceedings{saul:coling16,
      author = {Parisa Kordjamshidi and Daniel Khashabi and Christos Christodoulopoulos and Bhargav Mangipudi and Sameer Singh and Dan Roth},
      title = { {Better call Saul: Flexible Programming for Learning and Inference in NLP} },
      booktitle = {International Conference on Computational Linguistics (COLING)},
      month = {December},
      pages = {3030-3040},
      year = {2016}
    }
  • Hannah RashkinSameer SinghYejin Choi.Connotation Frames: A Data-Driven Investigation. Association for Computational Linguistics (ACL). 2016 Conference
    PDFarXivWebsiteACL Anthology, BibTex ]
    @inproceedings{connot:acl16,
      author = {Hannah Rashkin and Sameer Singh and Yejin Choi},
      title = { {Connotation Frames: A Data-Driven Investigation} },
      booktitle = {Association for Computational Linguistics (ACL)},
      month = {August},
      doi = {10.18653/v1/P16-1030},
      pages = {311-321},
      year = {2016}
    }
  • Marco Tulio RibeiroSameer SinghCarlos Guestrin."Why Should I Trust You?": Explaining the Predictions of Any Classifier. Knowledge Discovery and Data Mining (KDD). 2016 Conference
    Audience Appreciation Award
    Also presented at the CHI 2016 Workshop on Human-Centred Machine Learning (HCML).
    PDFarXivCodeVideoO'ReillyCode (experiments)ACM Page, BibTex ]
    @inproceedings{lime:kdd16,
      author = {Marco Tulio Ribeiro and Sameer Singh and Carlos Guestrin},
      title = { {"Why Should I Trust You?": Explaining the Predictions of Any Classifier} },
      booktitle = {Knowledge Discovery and Data Mining (KDD)},
      month = {August},
      doi = {10.1145/2939672.2939778},
      pages = {1135-1144},
      year = {2016}
    }
  • Marco Tulio RibeiroSameer SinghCarlos Guestrin."Why Should I Trust You?": Explaining the Predictions of Any Classifier. Demo at the Annual Conference of the North American Chapter of the Association for Computational Linguistics (NAACL). 2016 Demo
    Demonstration of the KDD 2016 paper.
    PDFCode, BibTex ]
    @inproceedings{lime:naacl16,
      author = {Marco Tulio Ribeiro and Sameer Singh and Carlos Guestrin},
      title = { {"Why Should I Trust You?": Explaining the Predictions of Any Classifier} },
      booktitle = {Demo at the Annual Conference of the North American Chapter of the Association for Computational Linguistics (NAACL)},
      month = {June},
      year = {2016}
    }
  • Marco Tulio RibeiroSameer SinghCarlos Guestrin.Introduction to Local Interpretable Model-Agnostic Explanations (LIME). O'Reilly Media. 2016 Online
    Article, BibTex ]
    @misc{lime:oreilly16,
      author = {Marco Tulio Ribeiro and Sameer Singh and Carlos Guestrin},
      title = { {Introduction to Local Interpretable Model-Agnostic Explanations (LIME)} },
      editor = {O'Reilly Media},
      month = {August},
      url = {https://www.oreilly.com/learning/introduction-to-local-interpretable-model-agnostic-explanations-lime},
      year = {2016}
    }
  • Sameer SinghMarco Tulio RibeiroCarlos Guestrin.Programs as Black-Box Explanations. NeurIPS Workshop on Interpretable Machine Learning in Complex Systems. 2016 Workshop
    PDFarXiv, Abstract, BibTex ]
    Recent work in model-agnostic explanations of black-box machine learning has demonstrated that interpretability of complex models does not have to come at the cost of accuracy or model flexibility. However, it is not clear what kind of explanations, such as linear models, decision trees, and rule lists, are the appropriate family to consider, and different tasks and models may benefit from different kinds of explanations. Instead of picking a single family of representations, in this work we propose to use "programs" as model-agnostic explanations. We show that small programs can be expressive yet intuitive as explanations, and generalize over a number of existing interpretable families. We propose a prototype program induction method based on simulated annealing that approximates the local behavior of black-box classifiers around a specific prediction using random perturbations. Finally, we present preliminary application on small datasets and show that the generated explanations are intuitive and accurate for a number of classifiers.
    @inproceedings{prog:nipsws16,
      author = {Sameer Singh and Marco Tulio Ribeiro and Carlos Guestrin},
      title = { {Programs as Black-Box Explanations} },
      booktitle = {NeurIPS Workshop on Interpretable Machine Learning in Complex Systems},
      month = {November},
      year = {2016}
    }
  • Marco Tulio RibeiroSameer SinghCarlos Guestrin.Nothing Else Matters: Model-Agnostic Explanations By Identifying Prediction Invariance. NeurIPS Workshop on Interpretable Machine Learning in Complex Systems. 2016 Workshop
    PDFarXiv, Abstract, BibTex ]
    At the core of interpretable machine learning is the question of whether humans are able to make accurate predictions about a model's behavior. Assumed in this question are three properties of the interpretable output: coverage, precision, and effort. Coverage refers to how often humans think they can predict the model's behavior, precision to how accurate humans are in those predictions, and effort is either the up-front effort required in interpreting the model, or the effort required to make predictions about a model's behavior.
    In this work, we propose anchor-LIME (aLIME), a model-agnostic technique that produces high-precision rule-based explanations for which the coverage boundaries are very clear. We compare aLIME to linear LIME with simulated experiments, and demonstrate the flexibility of aLIME with qualitative examples from a variety of domains and tasks.
    @inproceedings{anchor:nipsws16,
      author = {Marco Tulio Ribeiro and Sameer Singh and Carlos Guestrin},
      title = { {Nothing Else Matters: Model-Agnostic Explanations By Identifying Prediction Invariance} },
      booktitle = {NeurIPS Workshop on Interpretable Machine Learning in Complex Systems},
      month = {November},
      year = {2016}
    }
  • Marco Tulio RibeiroSameer SinghCarlos Guestrin."Why Should I Trust You?": Explaining the Predictions of Any Classifier. CHI Workshop on Human-Centred Machine Learning (HCML). 2016 Workshop
    Shorter version of the paper presented at KDD 2016.
    PDF, BibTex ]
    @inproceedings{lime:hcml16,
      author = {Marco Tulio Ribeiro and Sameer Singh and Carlos Guestrin},
      title = { {"Why Should I Trust You?": Explaining the Predictions of Any Classifier} },
      booktitle = {CHI Workshop on Human-Centred Machine Learning (HCML)},
      month = {May},
      year = {2016}
    }
  • Marco Tulio RibeiroSameer SinghCarlos Guestrin.Model-Agnostic Interpretability of Machine Learning. ICML Workshop on Human Interpretability in Machine Learning (WHI). 2016 Workshop
    Best Paper Award
    PDF, BibTex ]
    @inproceedings{lime:whi16,
      author = {Marco Tulio Ribeiro and Sameer Singh and Carlos Guestrin},
      title = { {Model-Agnostic Interpretability of Machine Learning} },
      booktitle = {ICML Workshop on Human Interpretability in Machine Learning (WHI)},
      month = {June},
      year = {2016}
    }
  • Sameer SinghSebastian Riedel.Creating Interactive and Visual Educational Resources for AI. AAAI Symposium on Educational Advances in Artificial Intelligence (EAAI). 2016 Workshop
    PDFAAAI Page, Abstract, BibTex ]
    Teaching artificial intelligence is effective if the experience is a visual and interactive one, with educational materials that utilize combinations of various content types such as text, math, and code into an integrated experience. Unfortunately, easy-to-use tools for creating such pedagogical resources are not available to the educators, resulting in most courses being taught using a disconnected set of static materials, which is not only ineffective for learning AI, but further, requires repeated and redundant effort for the instructor. In this paper, we introduce Moro, a software tool for easily creating and presenting AI-friendly teaching materials. Moro notebooks integrate content of different types (text, math, code, images), allow real-time interactions via modifiable and executable code blocks, and are viewable in browsers both as long-form pages and as presentations. Creating notebooks is easy and intuitive; the creation tool is also in-browser, is WYSIWYG for quick iterations of editing, and supports a variety of shortcuts and customizations for efficiency. We present three deployed case studies of Moro that widely differ from each other, demonstrating its utility in a variety of scenarios such as in-class teaching and conference tutorials.
    @inproceedings{moro:eaai16,
      author = {Sameer Singh and Sebastian Riedel},
      title = { {Creating Interactive and Visual Educational Resources for {AI}} },
      booktitle = {AAAI Symposium on Educational Advances in Artificial Intelligence (EAAI)},
      year = {2016}
    }
2015
  • Tim RocktaschelSameer SinghSebastian Riedel.Injecting Logical Background Knowledge into Embeddings for Relation Extraction. Annual Conference of the North American Chapter of the Association for Computational Linguistics (NAACL). 2015 Conference
    PDFCodeTalk videoACL Anthology, Abstract, BibTex ]
    Matrix factorization approaches to relation extraction provide several attractive features: they support distant supervision, handle open schemas, and leverage unlabeled data. Unfortunately, these methods share a shortcoming with all other distantly supervised approaches: they cannot learn to extract target relations without existing data in the knowledge base, and likewise, these models are inaccurate for relations with sparse data. Rule-based extractors, on the other hand, can be easily extended to novel relations and improved for existing but inaccurate relations, through first-order formulae that capture auxiliary domain knowledge. However, usually a large set of such formulae is necessary to achieve generalization.
    In this paper, we introduce a paradigm for learning low-dimensional embeddings of entity-pairs and relations that combine the advantages of matrix factorization with first-order logic domain knowledge. We introduce simple approaches for estimating such embeddings, as well as a novel training algorithm to jointly optimize over factual and first-order logic information. Our results show that this method is able to learn accurate extractors with little or no distant supervision alignments, while at the same time generalizing to textual patterns that do not appear in the formulae.
    @inproceedings{logicmf:naacl15,
      author = {Tim Rocktaschel and Sameer Singh and Sebastian Riedel},
      title = { {Injecting Logical Background Knowledge into Embeddings for Relation Extraction} },
      booktitle = {Annual Conference of the North American Chapter of the Association for Computational Linguistics (NAACL)},
      year = {2015}
    }
  • Tianqi ChenSameer SinghBen TaskarCarlos Guestrin.Efficient Second-Order Gradient Boosting for Conditional Random Fields. International Conference on Artificial Intelligence and Statistics (AISTATS). 2015 Conference
    PDF, BibTex ]
    @inproceedings{gbcrf:aistats15,
      author = {Tianqi Chen and Sameer Singh and Ben Taskar and Carlos Guestrin},
      title = { {Efficient Second-Order Gradient Boosting for Conditional Random Fields} },
      booktitle = {International Conference on Artificial Intelligence and Statistics (AISTATS)},
      year = {2015}
    }
  • Sameer SinghTim RocktaschelLuke HewittJason NaradowskySebastian Riedel.WOLFE: An NLP-friendly Declarative Machine Learning Stack. Demo at the Annual Conference of the North American Chapter of the Association for Computational Linguistics (NAACL). 2015 Demo
    PDFWebsiteDemo, Abstract, BibTex ]
    Developing machine learning algorithms for natural language processing (NLP) applications is inherently an iterative process, involving a continuous refinement of the choice of model, engineering of features, selection of inference algorithms, search for the right hyper-parameters, and error analysis. Existing probabilistic program languages (PPLs) only provide partial solutions; most of them do not support commonly used models such as matrix factorization or neural networks, and do not facilitate interactive and iterative programming that is crucial for rapid development of these models.
    In this demo we introduce WOLFE, a stack designed to facilitate the development of NLP applications: (1) the WOLFE language allows the user to concisely define complex models, enabling easy modification and extension, (2) the WOLFE interpreter transforms declarative machine learning code into automatically differentiable terms or, where applicable, into factor graphs that allow for complex models to be applied to real-world applications, and (3) the WOLFE IDE provides a number of different visual and interactive elements, allowing intuitive exploration and editing of the data representations, the underlying graphical models, and the execution of the inference algorithms.
    @inproceedings{wolfe:naacl15,
      author = {Sameer Singh and Tim Rocktaschel and Luke Hewitt and Jason Naradowsky and Sebastian Riedel},
      title = { {{WOLFE}: An {NLP}-friendly Declarative Machine Learning Stack} },
      booktitle = {Demo at the Annual Conference of the North American Chapter of the Association for Computational Linguistics (NAACL)},
      year = {2015}
    }
  • Xiao LingSameer SinghDan Weld.Design Challenges for Entity Linking. Transactions of the Association for Computational Linguistics (TACL). 2015 Journal
    To be presented at ACL, Beijing, July 26-31, 2015.
    PDFTACL PageTACL PDF, Abstract, BibTex ]
    Recent research on entity linking (EL) has introduced a plethora of promising techniques, ranging from deep neural networks to joint inference. But despite numerous papers there is surprisingly little understanding of the state of the art in EL. We attack this confusion by analyzing differences between several versions of the EL problem and presenting a simple yet effective, modular, unsupervised system, called Vinculum, for entity linking. We conduct an extensive evaluation on nine data sets, comparing Vinculum with two state-of-the-art systems, and elucidate key aspects of the system that include mention extraction, candidate generation, entity type prediction, entity coreference, and coherence.
    @article{el:tacl15,
      author = {Xiao Ling and Sameer Singh and Dan Weld},
      title = { {Design Challenges for Entity Linking} },
      journal = {Transactions of the Association for Computational Linguistics (TACL)},
      volume = {3},
      year = {2015}
    }
  • Nitish GuptaSameer Singh.Collective Factorization for Relational Data: An Evaluation on the Yelp Datasets. Technical Report, Yelp Dataset Challenge, Round 4. 2015 TechReport
    Grand Prize Winner of Yelp Dataset Challenge Round 4
    PDFWebsiteYelp Challenge, BibTex ]
    @techreport{factordb:yelp15,
      author = {Nitish Gupta and Sameer Singh},
      title = { {Collective Factorization for Relational Data: An Evaluation on the Yelp Datasets} },
      institution = {Yelp Dataset Challenge, Round 4},
      year = {2015}
    }
  • Guillaume BouchardSameer SinghTheo Trouillon.On Approximate Reasoning Capabilities of Low-Rank Vector Spaces. AAAI Spring Symposium on Knowledge Representation and Reasoning (KRR): Integrating Symbolic and Neural Approaches. 2015 Workshop
    PDFAAAI PDF, BibTex ]
    @inproceedings{logicmf:krr15,
      author = {Guillaume Bouchard and Sameer Singh and Theo Trouillon},
      title = { {On Approximate Reasoning Capabilities of Low-Rank Vector Spaces} },
      booktitle = {AAAI Spring Symposium on Knowledge Representation and Reasoning (KRR): Integrating Symbolic and Neural Approaches},
      year = {2015}
    }
  • Ivan SanchezTim RocktaschelSebastian RiedelSameer Singh.Towards Extracting Faithful and Descriptive Representations of Latent Variable Models. AAAI Spring Symposium on Knowledge Representation and Reasoning (KRR): Integrating Symbolic and Neural Approaches. 2015 Workshop
    PDFAAAI PDF, BibTex ]
    @inproceedings{explain:krr15,
      author = {Ivan Sanchez and Tim Rocktaschel and Sebastian Riedel and Sameer Singh},
      title = { {Towards Extracting Faithful and Descriptive Representations of Latent Variable Models} },
      booktitle = {AAAI Spring Symposium on Knowledge Representation and Reasoning (KRR): Integrating Symbolic and Neural Approaches},
      year = {2015}
    }
  • Sameer SinghTim RocktaschelSebastian Riedel.Towards Combined Matrix and Tensor Factorization for Universal Schema Relation Extraction. NAACL Workshop on Vector Space Modeling for NLP. 2015 Workshop
    PDF, BibTex ]
    @inproceedings{mftf:vsm15,
      author = {Sameer Singh and Tim Rocktaschel and Sebastian Riedel},
      title = { {Towards Combined Matrix and Tensor Factorization for Universal Schema Relation Extraction} },
      booktitle = {NAACL Workshop on Vector Space Modeling for NLP},
      year = {2015}
    }
2014
  • Ignacio CanoSameer SinghCarlos Guestrin.Distributed Non-Parametric Representations for Vital Filtering: UW at TREC KBA 2014. Text REtrieval Conference (TREC): Knowledge-Base Acceleration (KBA) Track. 2014 Conference
    PDF, BibTex ]
    @inproceedings{uw:kba14,
      author = {Ignacio Cano and Sameer Singh and Carlos Guestrin},
      title = { {Distributed Non-Parametric Representations for Vital Filtering: {UW at TREC KBA} 2014} },
      booktitle = {Text REtrieval Conference (TREC): Knowledge-Base Acceleration (KBA) Track},
      year = {2014}
    }
  • Sameer SinghThore GraepelLucas J. BordeauxAndrew D. Gordon.Relational database management. US Patent Number 0188928. 2014 Patent
    PDFWebpage, BibTex ]
    @techreport{rdb:patent14,
      author = {Sameer Singh and Thore Graepel and Lucas J. Bordeaux and Andrew D. Gordon},
      title = { {Relational database management} },
      institution = {US Patent Number 0188928},
      year = {2014}
    }
  • Sameer Singh.Scaling MCMC Inference and Belief Propagation for Large, Dense Graphical Models. PhD Thesis, University of Massachusetts. 2014 Thesis
    Committee: Andrew McCallum, Carlos Guestrin, Ben Marlin, David Jensen, Michael Zink.
    PDFUMass Page, Abstract, BibTex ]
    With the physical constraints of semiconductor-based electronics becoming increasingly limiting in the past decade, single-core CPUs have given way to multi-core and distributed computing platforms. At the same time, access to large data collections is progressively becoming commonplace due to the lowering cost of storage and bandwidth. Traditional machine learning paradigms that have been designed to operate sequentially on single processor architectures seem destined to become obsolete in this world of multi-core, multi-node systems and massive data sets. Inference for graphical models is one such example for which most existing algorithms are sequential in nature and are difficult to scale using parallel computations. Further, modeling large datasets leads to an escalation in the number of variables, factors, domains, and the density of the models, all of which have a substantial impact on the computational and storage complexity of inference. To achieve scalability, existing techniques impose strict independence assumptions on the model, resulting in tractable inference at the expense of expressiveness, and therefore of accuracy and utility, of the model.
    Motivated by the need to scale inference to large, dense graphical models, in this thesis we explore approximations to Markov chain Monte Carlo (MCMC) and belief propagation (BP) that induce dynamic sparsity in the model to utilize parallelism. In particular, since computations over some factors, variables, and values are more important than over others at different stages of inference, proposed approximations that prioritize and parallelize such computations facilitate efficient inference. First, we show that a synchronously distributed MCMC algorithm that uses dynamic partitioning of the model achieves scalable inference. We then identify bottlenecks in the synchronous architecture, and demonstrate that a collection of MCMC techniques that use asynchronous updates are able to address these drawbacks. For large domains and high-order factors, we find that dynamically inducing sparsity in variable domains, results in scalable belief propagation that enables joint inference. We also show that formulating distributed BP and joint inference as generalized BP on cluster graphs, and by using cluster message approximations, provides significantly lower communication cost and running time.With these tools for inference in hand, we are able to tackle entity tagging, relation extraction, entity resolution, cross-document coreference, joint inference, and other information extraction tasks over large text corpora.
    @phdthesis{thesis,
      author = {Sameer Singh},
      title = { {Scaling MCMC Inference and Belief Propagation for Large, Dense Graphical Models} },
      school = {University of Massachusetts},
      year = {2014}
    }
  • Mathias NiepertSameer Singh.Out of Many, One: Unifying Web-Extracted Knowledge Bases. Workshop on Automated Knowledge Base Construction (AKBC). 2014 Workshop
    PDF, BibTex ]
    @inproceedings{kb-integration:akbc14,
      author = {Mathias Niepert and Sameer Singh},
      title = { {Out of Many, One: Unifying Web-Extracted Knowledge Bases} },
      booktitle = {Workshop on Automated Knowledge Base Construction (AKBC)},
      year = {2014}
    }
  • Sameer SinghSebastian RiedelLuke HewittTim Rocktaschel.Designing an IDE for Probabilistic Programming: Challenges and a Prototype. NeurIPS Workshop on Probabilistic Programming. 2014 WorkshopDemo
    Also presented at NeurIPS 2014 as a demo.
    PDFDemoPoster, BibTex ]
    @inproceedings{ppl-ide:probprog14,
      author = {Sameer Singh and Sebastian Riedel and Luke Hewitt and Tim Rocktaschel},
      title = { {Designing an IDE for Probabilistic Programming: Challenges and a Prototype} },
      booktitle = {NeurIPS Workshop on Probabilistic Programming},
      year = {2014}
    }
  • Xiao LingSameer SinghDan Weld.Context Representation for Named Entity Linking. Pacific Northwest Regional NLP Workshop (NW-NLP). 2014 Workshop
    PDF, BibTex ]
    @inproceedings{context:nwnlp14,
      author = {Xiao Ling and Sameer Singh and Dan Weld},
      title = { {Context Representation for Named Entity Linking} },
      booktitle = {Pacific Northwest Regional NLP Workshop (NW-NLP)},
      year = {2014}
    }
  • Victoria (Xi) LinSameer SinghLuheng HeBen TaskarLuke Zettlemoyer.Multi-label Learning with Posterior Regularization. NeurIPS Workshop on Modern Machine Learning and Natural Language Processing. 2014 Workshop
    Also presented at the Pacific Northwest Regional NLP Workshop (NW-NLP) 2014.
    PDF, BibTex ]
    @inproceedings{prlr:mmlnlp14,
      author = {Victoria (Xi) Lin and Sameer Singh and Luheng He and Ben Taskar and Luke Zettlemoyer},
      title = { {Multi-label Learning with Posterior Regularization} },
      booktitle = {NeurIPS Workshop on Modern Machine Learning and Natural Language Processing},
      year = {2014}
    }
  • Tim RocktaschelSameer SinghMatko BosnjakSebastian Riedel.Low-dimensional Embeddings of Logic. ACL 2014 Workshop on Semantic Parsing (SP14). 2014 Workshop
    Exceptional Submission Award
    Also presented at StarAI 2014 with minor changes.
    PDFPoster, BibTex ]
    @inproceedings{logic:sp14,
      author = {Tim Rocktaschel and Sameer Singh and Matko Bosnjak and Sebastian Riedel},
      title = { {Low-dimensional Embeddings of Logic} },
      booktitle = {ACL 2014 Workshop on Semantic Parsing (SP14)},
      year = {2014}
    }
  • Sebastian RiedelSameer SinghVivek SrikumarTim RocktaschelLarysa VisengeriyevaJan Noessner.WOLFE: Strength Reduction and Approximate Programming for Probabilistic Programming. International Workshop on Statistical Relational AI (StarAI). 2014 Workshop
    Also presented at NeurIPS Probabilistic Programming Workshop.
    PDFWebsitePoster, BibTex ]
    @inproceedings{wolfe:starai14,
      author = {Sebastian Riedel and Sameer Singh and Vivek Srikumar and Tim Rocktaschel and Larysa Visengeriyeva and Jan Noessner},
      title = { {WOLFE: Strength Reduction and Approximate Programming for Probabilistic Programming} },
      booktitle = {International Workshop on Statistical Relational AI (StarAI)},
      year = {2014}
    }
2013
  • Fabian M. SuchanekSameer SinghSebastian RiedelPartha P. Talukdar.AKBC 2013: Third Workshop on Automated Knowledge Base Construction. ACM Conference of Information and Knowledge Management (CIKM). 2013 Conference
    PDFACM DL, Abstract, BibTex ]
    The AKBC 2013 workshop aims to be a venue of excellence and vision in the area of knowledge base construction. This year's workshop will feature keynotes by ten leading researchers in the field, including from Google, Microsoft, Stanford, and CMU. The submissions focus on visionary ideas instead of on experimental evaluation. Nineteen accepted papers will be presented as posters, with nine exceptional papers also highlighted as spotlight talks. Thereby, the workshop aims provides a vivid forum of discussion about the field of automated knowledge base construction.
    @inproceedings{akbc13,
      author = {Fabian M. Suchanek and Sameer Singh and Sebastian Riedel and Partha P. Talukdar},
      title = { {AKBC 2013: Third Workshop on Automated Knowledge Base Construction} },
      booktitle = {ACM Conference of Information and Knowledge Management (CIKM)},
      year = {2013}
    }
  • Sameer SinghThore Graepel.Automated Probabilistic Modeling for Relational Data. ACM Conference of Information and Knowledge Management (CIKM). 2013 Conference
    PDFMSR Page, Abstract, BibTex ]
    Probabilistic graphical model representations of relational data provide a number of desired features, such as inference of missing values, detection of errors, visualization of data, and probabilistic answers to relational queries. However, adoption has been slow due to the high level of expertise expected both in probability and in the domain from the user. Instead of requiring a domain expert to specify the probabilistic dependencies of the data, we present an approach that uses the relational DB schema to automatically construct a Bayesian graphical model for a database. This resulting model contains customized distributions for the attributes, latent variables that cluster the records, and factors that reflect and represent the foreign key links, whilst allowing efficient inference. Experiments demonstrate the accuracy of the model and scalability of inference on synthetic and real-world data.
    @inproceedings{cikm13,
      author = {Sameer Singh and Thore Graepel},
      title = { {Automated Probabilistic Modeling for Relational Data} },
      booktitle = {ACM Conference of Information and Knowledge Management (CIKM)},
      year = {2013}
    }
  • Sameer SinghLimin YaoDavid BelangerAri KobrenSam AnzarootMichael WickAlexandre PassosHarshal PandyaJinho ChoiBrian MartinAndrew McCallum.Universal Schema for Slot Filling and Cold Start: UMass IESL at TACKBP 2013. Text Analysis Conference on Knowledge Base Population (TAC KBP). 2013 Conference
    PDF, Abstract, BibTex ]
    We employ universal schema for the TAC KBP slot filling and cold start tasks. The technique enlarges the set of relations in an ontology, e.g., TACKBP slots, to contain all surface patterns between pairs of entities in a large corpus. By factorizing the matrix of co-occurrences between entity pairs and universal schema relations, we are able to predict new target slots. This differs fundamentally from traditional relation extraction approaches because an entire knowledge base is constructed jointly over train and test data. To produce submissions for the slot filling and cold start tasks, we simply query this knowledge base. We describe universal schema, our data preprocessing pipeline, and additional techniques we employ for predicting entities' attributes.
    @inproceedings{kbp13,
      author = {Sameer Singh and Limin Yao and David Belanger and Ari Kobren and Sam Anzaroot and Michael Wick and Alexandre Passos and Harshal Pandya and Jinho Choi and Brian Martin and Andrew McCallum},
      title = { {Universal Schema for Slot Filling and Cold Start: UMass IESL at TACKBP 2013} },
      booktitle = {Text Analysis Conference on Knowledge Base Population (TAC KBP)},
      year = {2013}
    }
  • Jiaping ZhengLuke VilnisSameer SinghJinho ChoiAndrew McCallum.Dynamic Knowledge-Base Alignment for Coreference Resolution. Conference on Computational Natural Language Learning (CoNLL). 2013 Conference
    PDF, Abstract, BibTex ]
    Coreference resolution systems can benefit greatly from inclusion of global context, and a number of recent approaches have demonstrated improvements when precomputing an alignment to external knowledge sources. However, since alignment itself is a challenging task and is often noisy, existing systems either align conservatively, resulting in very few links, or combine the attributes of multiple candidates, leading to a conflation of entities. Our approach instead performs joint inference between within-document coreference and entity linking, maintaining ranked lists of candidate entities that are dynamically merged and reranked during inference. Further, we incorporate a large set of surface string variations for each entity by using anchor texts from the web that link to the entity. These forms of global context enables our system to improve classifier-based coreference by 1.09 B3 F1 points, and improve over the previous state-of-art by 0.41 points, thus introducing a new state-of-art result on the ACE 2004 data.
    @inproceedings{conll13,
      author = {Jiaping Zheng and Luke Vilnis and Sameer Singh and Jinho Choi and Andrew McCallum},
      title = { {Dynamic Knowledge-Base Alignment for Coreference Resolution} },
      booktitle = {Conference on Computational Natural Language Learning (CoNLL)},
      year = {2013}
    }
  • Sameer SinghSebastian RiedelBrian MartinJiaping ZhengAndrew McCallum.Joint Inference of Entities, Relations, and Coreference. CIKM Workshop on Automated Knowledge Base Construction (AKBC). 2013 Workshop
    PDF, BibTex ]
    @inproceedings{jnt:akbc13,
      author = {Sameer Singh and Sebastian Riedel and Brian Martin and Jiaping Zheng and Andrew McCallum},
      title = { {Joint Inference of Entities, Relations, and Coreference} },
      booktitle = {CIKM Workshop on Automated Knowledge Base Construction (AKBC)},
      year = {2013}
    }
  • Michael WickSameer SinghAri KobrenAndrew McCallum.Assessing Confidence of Knowledge Base Content with an Experimental Study in Entity Resolution. CIKM Workshop on Automated Knowledge Base Construction (AKBC). 2013 Workshop
    PDF, BibTex ]
    @inproceedings{conf:akbc13,
      author = {Michael Wick and Sameer Singh and Ari Kobren and Andrew McCallum},
      title = { {Assessing Confidence of Knowledge Base Content with an Experimental Study in Entity Resolution} },
      booktitle = {CIKM Workshop on Automated Knowledge Base Construction (AKBC)},
      year = {2013}
    }
  • Sameer SinghSebastian RiedelAndrew McCallum.Anytime Belief Propagation Using Sparse Domains. Neural Information Processing Systems (NeurIPS) Workshop on Resource Efficient Machine Learning. 2013 Workshop
    PDFarXiv, Abstract, BibTex ]
    Belief Propagation has been widely used for marginal inference, however it is slow on problems with large-domain variables and high-order factors. Previous work provides useful approximations to facilitate inference on such models, but lacks important anytime properties such as: 1) providing accurate and consistent marginals when stopped early, 2) improving the approximation when run longer, and 3) converging to the fixed point of BP. To this end, we propose a message passing algorithm that works on sparse (partially instantiated) domains, and converges to consistent marginals using dynamic message scheduling. The algorithm grows the sparse domains incrementally, selecting the next value to add using prioritization schemes based on the gradients of the marginal inference objective. Our experiments demonstrate local anytime consistency and fast convergence, providing significant speedups over BP to obtain low-error marginals: up to 25 times on grid models, and up to 6 times on a real-world natural language processing task.
    @inproceedings{sparse:reseff13,
      author = {Sameer Singh and Sebastian Riedel and Andrew McCallum},
      title = { {Anytime Belief Propagation Using Sparse Domains} },
      booktitle = {Neural Information Processing Systems (NeurIPS) Workshop on Resource Efficient Machine Learning},
      year = {2013}
    }
2012
  • Sameer SinghMichael WickAndrew McCallum.Monte Carlo MCMC: Efficient Inference by Approximate Sampling. Empirical Methods in Natural Language Processing (EMNLP). 2012 Conference
    PDFACL Anthology, BibTex ]
    @inproceedings{mcmcmc:emnlp12,
      author = {Sameer Singh and Michael Wick and Andrew McCallum},
      title = { {Monte Carlo MCMC: Efficient Inference by Approximate Sampling} },
      booktitle = {Empirical Methods in Natural Language Processing (EMNLP)},
      year = {2012}
    }
  • Michael WickSameer SinghAndrew McCallum.A Discriminative Hierarchical Model for Fast Coreference at Large Scale. Association for Computational Linguistics (ACL). 2012 Conference
    PDFACL Anthology, BibTex ]
    @inproceedings{hcoref:acl12,
      author = {Michael Wick and Sameer Singh and Andrew McCallum},
      title = { {A Discriminative Hierarchical Model for Fast Coreference at Large Scale} },
      booktitle = {Association for Computational Linguistics (ACL)},
      year = {2012}
    }
  • Sameer SinghGregory DruckAndrew McCallum.Constraint-Driven Training of Complex Models Using MCMC. Technical Report, University of Massachusetts Amherst, CMPSCI UM-CS-2012-032. 2012 TechReport
    PDF, BibTex ]
    @techreport{mcmcge:tr2012,
      author = {Sameer Singh and Gregory Druck and Andrew McCallum},
      title = { {Constraint-Driven Training of Complex Models Using MCMC} },
      institution = {University of Massachusetts Amherst, CMPSCI UM-CS-2012-032},
      year = {2012}
    }
  • Sameer SinghThore Graepel.Compiling Relational Database Schemata into Probabilistic Graphical Models. NeurIPS Workshop on Probabilistic Programming. 2012 Workshop
    PDFarXivWebsite, BibTex ]
    @inproceedings{mldb:probprog12,
      author = {Sameer Singh and Thore Graepel},
      title = { {Compiling Relational Database Schemata into Probabilistic Graphical Models} },
      booktitle = {NeurIPS Workshop on Probabilistic Programming},
      year = {2012}
    }
  • Sameer SinghMichael WickAndrew McCallum.Monte Carlo MCMC: Efficient Inference by Sampling Factors. NAACL/HLT Workshop on Automated Knowledge Base Construction (AKBC-WEKEX). 2012 Workshop
    PDF, BibTex ]
    @inproceedings{mcmcmc:akbc12,
      author = {Sameer Singh and Michael Wick and Andrew McCallum},
      title = { {Monte Carlo MCMC: Efficient Inference by Sampling Factors} },
      booktitle = {NAACL/HLT Workshop on Automated Knowledge Base Construction (AKBC-WEKEX)},
      year = {2012}
    }
2011
  • Jeremy KubicaSameer SinghDaria Sorokina.Parallel Large-scale Feature Selection. Scaling Up Machine Learning, Cambridge University Press. 2011 Chapter
    PDFDetailsPublisherAmazon, BibTex ]
    @incollection{parfs:suml11,
      author = {Jeremy Kubica and Sameer Singh and Daria Sorokina},
      title = { {Parallel Large-scale Feature Selection} },
      booktitle = {Scaling Up Machine Learning, Cambridge University Press},
      year = {2011}
    }
  • Sameer SinghAmarnag SubramanyaFernando PereiraAndrew McCallum.Large-Scale Cross-Document Coreference Using Distributed Inference and Hierarchical Models. Association for Computational Linguistics (ACL). 2011 Conference
    Best Talk Award at DARPA Machine Reading Project Kickoff.
    PDF, BibTex ]
    @inproceedings{dcoref:acl11,
      author = {Sameer Singh and Amarnag Subramanya and Fernando Pereira and Andrew McCallum},
      title = { {Large-Scale Cross-Document Coreference Using Distributed Inference and Hierarchical Models} },
      booktitle = {Association for Computational Linguistics (ACL)},
      year = {2011}
    }
  • Sameer SinghAndrew McCallum.Towards Asynchronous Distributed MCMC Inference for Large Graphical Models. Neural Information Processing Systems (NeurIPS) Workshop on Big Learning. 2011 Workshop
    PDF, BibTex ]
    @inproceedings{asyncmcmc:biglearn11,
      author = {Sameer Singh and Andrew McCallum},
      title = { {Towards Asynchronous Distributed MCMC Inference for Large Graphical Models} },
      booktitle = {Neural Information Processing Systems (NeurIPS) Workshop on Big Learning},
      year = {2011}
    }
  • Sameer SinghBrian MartinAndrew McCallum.Inducing Value Sparsity for Parallel Inference in Tree-shaped Models. Neural Information Processing Systems (NeurIPS) Workshop on Computational Trade-offs in Statistical Learning (COST). 2011 Workshop
    PDF, BibTex ]
    @inproceedings{sparsebp:cost11,
      author = {Sameer Singh and Brian Martin and Andrew McCallum},
      title = { {Inducing Value Sparsity for Parallel Inference in Tree-shaped Models} },
      booktitle = {Neural Information Processing Systems (NeurIPS) Workshop on Computational Trade-offs in Statistical Learning (COST)},
      year = {2011}
    }
2010
  • Sameer SinghLimin YaoSebastian RiedelAndrew McCallum.Constraint-Driven Rank-Based Learning for Information Extraction. Annual Conference of the North American Chapter of the Association for Computational Linguistics (NAACL). 2010 Conference
    PDFACL Anthology, Abstract, BibTex ]
    Most learning algorithms for factor graphs require complete inference over the dataset or an instance before making an update to the parameters. SampleRank is a rank-based learning framework that alleviates this problem by updating the parameters during inference. Most semi-supervised learning algorithms also rely on the complete inference, i.e. calculating expectations or MAP configurations. We extend the SampleRank framework to the semi-supervised learning, avoiding these inference bottlenecks. Different approaches for incorporating unlabeled data and prior knowledge into this framework are explored. We evaluated our method on a standard information extraction dataset. Our approach outperforms the supervised method significantly and matches the result of the competing semi-supervised learning approach.Most learning algorithms for factor graphs require complete inference over the dataset or an instance before making an update to the parameters. SampleRank is a rank-based learning framework that alleviates this problem by updating the parameters during inference. Most semi-supervised learning algorithms also rely on the complete inference, i.e. calculating expectations or MAP configurations. We extend the SampleRank framework to the semi-supervised learning, avoiding these inference bottlenecks. Different approaches for incorporating unlabeled data and prior knowledge into this framework are explored. We evaluated our method on a standard information extraction dataset. Our approach outperforms the supervised method significantly and matches the result of the competing semi-supervised learning approach.
    @inproceedings{cons:naacl10,
      author = {Sameer Singh and Limin Yao and Sebastian Riedel and Andrew McCallum},
      title = { {Constraint-Driven Rank-Based Learning for Information Extraction} },
      booktitle = {Annual Conference of the North American Chapter of the Association for Computational Linguistics (NAACL)},
      year = {2010}
    }
  • Sameer SinghDustin HillardChris Leggetter.Minimally-Supervised Extraction of Entities from Text Advertisements. Annual Conference of the North American Chapter of the Association for Computational Linguistics (NAACL). 2010 Conference
    PDFACL Anthology, Abstract, BibTex ]
    Extraction of entities from ad creatives is an important problem that can benefit many computational advertising tasks. Supervised and semi-supervised solutions rely on labeled data which is expensive, time consuming, and difficult to procure for ad creatives. A small set of manually derived constraints on feature expectations over unlabeled data can be used to *partially* and *probabilistically* label large amounts of data. Utilizing recent work in constraint-based semi-supervised learning, this paper injects light weight supervision specified as these ``constraints'' into a semi-Markov conditional random field model of entity extraction in ad creatives. Relying solely on the constraints, the model is trained on a set of unlabeled ads using an online learning algorithm. We demonstrate significant accuracy improvements on a manually labeled test set as compared to a baseline dictionary approach. We also achieve accuracy that approaches a fully supervised classifier.
    @inproceedings{min:naacl10,
      author = {Sameer Singh and Dustin Hillard and Chris Leggetter},
      title = { {Minimally-Supervised Extraction of Entities from Text Advertisements} },
      booktitle = {Annual Conference of the North American Chapter of the Association for Computational Linguistics (NAACL)},
      year = {2010}
    }
  • Sameer SinghMichael WickAndrew McCallum.Distantly Labeling Data for Large Scale Cross-Document Coreference. Technical Report, Computing Research Repository (CoRR) eprint arXiv:1005.4298. 2010 TechReport
    PDFarXiv, Abstract, BibTex ]
    Cross-document coreference, the problem of resolving entity mentions across multi-document collections, is crucial to automated knowledge base construction and data mining tasks. However, the scarcity of large labeled data sets has hindered supervised machine learning research for this task. In this paper we develop and demonstrate an approach based on ``distantly-labeling'' a data set from which we can train a discriminative cross-document coreference model. In particular we build a dataset of more than a million people mentions extracted from 3.5 years of New York Times articles, leverage Wikipedia for distant labeling with a generative model (and measure the reliability of such labeling); then we train and evaluate a conditional random field coreference model that has factors on cross-document entities as well as mention-pairs. This coreference model obtains high accuracy in resolving mentions and entities that are not present in the training data, indicating applicability to non-Wikipedia data. Given the large amount of data, our work is also an exercise demonstrating the scalability of our approach.
    @techreport{distantly:tr10,
      author = {Sameer Singh and Michael Wick and Andrew McCallum},
      title = { {Distantly Labeling Data for Large Scale Cross-Document Coreference} },
      institution = {Computing Research Repository (CoRR) eprint arXiv:1005.4298},
      year = {2010}
    }
  • Sameer SinghAmarnag SubramanyaFernando PereiraAndrew McCallum.Distributed MAP Inference for Undirected Graphical Models. Neural Information Processing Systems (NeurIPS) Workshop on Learning on Cores, Clusters, and Clouds (LCCC). 2010 Workshop
    PDFVideo, Abstract, BibTex ]
    In this work, we distribute the MCMC-based MAP inference using the Map-Reduce framework. The variables are assigned randomly to machines, which leads to some factors that neighbor variables on separate machines. Parallel MCMC-chains are initiated using proposal distributions that only suggest local changes such that factors that lie across machines are not examined. After a fixed number of samples on each machine, we redistribute the variables amongst the machines to enable proposals across variables that were on different machines. To demonstrate the distribution strategy on a real-world information extraction application, we model the task of cross-document coreference.
    @inproceedings{distmap:lccc10,
      author = {Sameer Singh and Amarnag Subramanya and Fernando Pereira and Andrew McCallum},
      title = { {Distributed MAP Inference for Undirected Graphical Models} },
      booktitle = {Neural Information Processing Systems (NeurIPS) Workshop on Learning on Cores, Clusters, and Clouds (LCCC)},
      year = {2010}
    }
2009
  • Michael WickKhashyar RohanimaneshSameer SinghAndrew McCallum.Training Factor Graphs with Reinforcement Learning for Efficient MAP Inference. Neural Information Processing Systems (NeurIPS). 2009 Conference
    PDF, BibTex ]
    @inproceedings{rlmap:nips09,
      author = {Michael Wick and Khashyar Rohanimanesh and Sameer Singh and Andrew McCallum},
      title = { {Training Factor Graphs with Reinforcement Learning for Efficient MAP Inference} },
      booktitle = {Neural Information Processing Systems (NeurIPS)},
      year = {2009}
    }
  • Andrew McCallumKarl SchultzSameer Singh.FACTORIE: Probabilistic Programming via Imperatively Defined Factor Graphs. Neural Information Processing Systems (NeurIPS). 2009 Conference
    PDF, BibTex ]
    @inproceedings{factorie:nips09,
      author = {Andrew McCallum and Karl Schultz and Sameer Singh},
      title = { {FACTORIE: Probabilistic Programming via Imperatively Defined Factor Graphs} },
      booktitle = {Neural Information Processing Systems (NeurIPS)},
      year = {2009}
    }
  • Sameer SinghKarl SchultzAndrew McCallum.Bi-directional Joint Inference for Entity Resolution and Segmentation using Imperatively-Defined Factor Graphs. Machine Learning and Knowledge Discovery in Databases (Lecture Notes in Computer Science) and European Conference on Machine Learning and Principles and Practice of Knowledge Discovery in Databases (ECML PKDD). 2009 Conference
    PDFVideo, BibTex ]
    @inproceedings{bidirectional:ecml09,
      author = {Sameer Singh and Karl Schultz and Andrew McCallum},
      title = { {Bi-directional Joint Inference for Entity Resolution and Segmentation using Imperatively-Defined Factor Graphs} },
      booktitle = {Machine Learning and Knowledge Discovery in Databases (Lecture Notes in Computer Science) and European Conference on Machine Learning and Principles and Practice of Knowledge Discovery in Databases (ECML PKDD)},
      year = {2009}
    }
  • Sameer SinghJeremy KubicaScott E. LarsenDaria Sorokina.Parallel Large Scale Feature Selection for Logistic Regression. SIAM International Conference on Data Mining (SDM). 2009 Conference
    PDF, BibTex ]
    @inproceedings{parallel:sdm09,
      author = {Sameer Singh and Jeremy Kubica and Scott E. Larsen and Daria Sorokina},
      title = { {Parallel Large Scale Feature Selection for Logistic Regression} },
      booktitle = {SIAM International Conference on Data Mining (SDM)},
      year = {2009}
    }
  • Sameer Singh.Option Discovery in Hierarchical Reinforcement Learning for Training Large Factor Graphs for Information Extraction. University of Massachusetts Amherst, PhD Candidacy/Synthesis Report. 2009 Report
    Readers: Andy Barto and Andrew McCallum
    PDF, BibTex ]
    @misc{option:synth09,
      author = {Sameer Singh},
      title = { {Option Discovery in Hierarchical Reinforcement Learning for Training Large Factor Graphs for Information Extraction} },
      series = {University of Massachusetts Amherst, PhD Candidacy/Synthesis Report},
      year = {2009}
    }
2008
  • Sameer SinghE. S. LarsenJeremy KubicaAndrew W. Moore.Feature selection for large scale models. US Patent Number 8190537. 2008 Patent
    PDFWebpage, BibTex ]
    @techreport{feature:patent08,
      author = {Sameer Singh and E. S. Larsen and Jeremy Kubica and Andrew W. Moore},
      title = { {Feature selection for large scale models} },
      institution = {US Patent Number 8190537},
      year = {2008}
    }
  • Khashyar RohanimaneshMichael WickSameer SinghAndrew McCallum.Reinforcement Learning for MAP Inference in Large Factor Graphs. Technical Report, University of Massachusetts Amherst, CMPSCI UM-CS-2008-040. 2008 TechReport
    PDF, BibTex ]
    @techreport{rlmap:tr08,
      author = {Khashyar Rohanimanesh and Michael Wick and Sameer Singh and Andrew McCallum},
      title = { {Reinforcement Learning for MAP Inference in Large Factor Graphs} },
      institution = {University of Massachusetts Amherst, CMPSCI UM-CS-2008-040},
      year = {2008}
    }
  • Andrew McCallumKhashyar RohanimaneshMichael WickKarl SchultzSameer Singh.FACTORIE: Efficient Probabilistic Programming via Imperative Declarations of Structure, Inference and Learning. NeurIPS Workshop on Probabilistic Programming. 2008 Workshop
    PDF, BibTex ]
    @inproceedings{factorie:nipsws08,
      author = {Andrew McCallum and Khashyar Rohanimanesh and Michael Wick and Karl Schultz and Sameer Singh},
      title = { {FACTORIE: Efficient Probabilistic Programming via Imperative Declarations of Structure, Inference and Learning} },
      booktitle = {NeurIPS Workshop on Probabilistic Programming},
      year = {2008}
    }
2007
  • S.R. SchachT.O.S. AdeshiyanD. BalasubramanianG. MadlE.P. OssesSameer SinghK. SuwanmongkolM. XieD.G. Feitelson.Common Coupling and Pointer Variables, with Application to a Linux Case Study. Software Quality Journal (SQJ). 2007 Journal
    PDF, BibTex ]
    @article{sqj07,
      author = {S.R. Schach and T.O.S. Adeshiyan and D. Balasubramanian and G. Madl and E.P. Osses and Sameer Singh and K. Suwanmongkol and M. Xie and D.G. Feitelson},
      title = { {Common Coupling and Pointer Variables, with Application to a Linux Case Study} },
      journal = {Software Quality Journal (SQJ)},
      volume = {15},
      year = {2007}
    }
  • D.G. FeitelsonT.O.S. AdeshiyanD. BalasubramanianY. EtsionG. MadlE.P. OssesSameer SinghK. SuwanmongkolM. XieS.R. Schach.Fine-Grain Analysis of Common Coupling and its Application to a Linux Case Study. Journal of Systems and Software (JSS). 2007 Journal
    PDF, BibTex ]
    @article{jss07,
      author = {D.G. Feitelson and T.O.S. Adeshiyan and D. Balasubramanian and Y. Etsion and G. Madl and E.P. Osses and Sameer Singh and K. Suwanmongkol and M. Xie and S.R. Schach},
      title = { {Fine-Grain Analysis of Common Coupling and its Application to a Linux Case Study} },
      journal = {Journal of Systems and Software (JSS)},
      volume = {80},
      year = {2007}
    }
  • T. KichkayloC. van BuskirkSameer SinghH. NeemaM. OroszR. Neches.Mixed-Initiative Planning for Space Exploration Missions. International Conference on Automated Planning and Scheduling Workshop (ICAPS). 2007 Workshop
    [ BibTex ]
    @inproceedings{icaps07,
      author = {T. Kichkaylo and C. van Buskirk and Sameer Singh and H. Neema and M. Orosz and R. Neches},
      title = { {Mixed-Initiative Planning for Space Exploration Missions} },
      booktitle = {International Conference on Automated Planning and Scheduling Workshop (ICAPS)},
      year = {2007}
    }
2006
  • Sameer SinghJulie A. Adams.Transfer of Learning for Complex Domains: A Demonstration Using Multiple Robots. International Conference on Robotics and Automation (ICRA). 2006 Conference
    PDF, BibTex ]
    @inproceedings{icra06,
      author = {Sameer Singh and Julie A. Adams},
      title = { {Transfer of Learning for Complex Domains: A Demonstration Using Multiple Robots} },
      booktitle = {International Conference on Robotics and Automation (ICRA)},
      year = {2006}
    }
2003
  • Sameer Singh.Finding the shortest path for a mobile robot in an unmapped maze from minimum runs. Int Conf on CAD, CAM, Robotics and Autonomous Factories (INCARF). 2003 Conference
    [ BibTex ]
    @inproceedings{incarf03,
      author = {Sameer Singh},
      title = { {Finding the shortest path for a mobile robot in an unmapped maze from minimum runs} },
      booktitle = {Int Conf on CAD, CAM, Robotics and Autonomous Factories (INCARF)},
      year = {2003}
    }