@inproceedings{10.1145/3052973.3053017, author = {Huang, Cheng and Hao, Shuang and Invernizzi, Luca and Liu, Jiayong and Fang, Yong and Kruegel, Christopher and Vigna, Giovanni}, title = {Gossip: Automatically Identifying Malicious Domains from Mailing List Discussions}, year = {2017}, isbn = {9781450349444}, publisher = {Association for Computing Machinery}, address = {New York, NY, USA}, url = {https://doi.org/10.1145/3052973.3053017}, doi = {10.1145/3052973.3053017}, abstract = {Domain names play a critical role in cybercrime, because they identify hosts that serve malicious content (such as malware, Trojan binaries, or malicious scripts), operate as command-and-control servers, or carry out some other role in the malicious network infrastructure. To defend against Internet attacks and scams, operators widely use blacklisting to detect and block malicious domain names and IP addresses. Existing blacklists are typically generated by crawling suspicious domains, manually or automatically analyzing malware, and collecting information from honeypots and intrusion detection systems. Unfortunately, such blacklists are difficult to maintain and are often slow to respond to new attacks. Security experts set up and join mailing lists to discuss and share intelligence information, which provides a better chance to identify emerging malicious activities. In this paper, we design Gossip, a novel approach to automatically detect malicious domains based on the analysis of discussions in technical mailing lists (particularly on security-related topics) by using natural language processing and machine learning techniques. We identify a set of effective features extracted from email threads, users participating in the discussions, and content keywords, to infer malicious domains from mailing lists, without the need to actually crawl the suspect websites. Our result shows that Gossip achieves high detection accuracy. Moreover, the detection from our system is often days or weeks earlier than existing public blacklists.}, booktitle = {Proceedings of the 2017 ACM on Asia Conference on Computer and Communications Security}, pages = {494–505}, numpages = {12}, keywords = {blacklists, malware detection, natural language processing}, location = {Abu Dhabi, United Arab Emirates}, series = {ASIA CCS '17} }