Skip to main content

I'm looking at parsing out just the most recent reply/message from an email thread as part of a zap.

I've found this link but how to I use it within a Zap? https://github.com/zapier/email-reply-parser

i.e. when I pick up a thread from gmail how do I just extract the most recent message?

Is this possible in Code by Zapier and if so how?

E.g.

Input:

Yes that is fine, I will email you in the morning.

On Fri, Nov 16, 2012 at 1:48 PM, Zapier wrote:

Our support team just commented on your open Ticket: "Hi Royce, can we chat in the morning about your question?"

Ouput: i.e. the parsed email:

Yes that is fine, I will email you in the morning.

Hi @taylornd ,

Thanks for reaching out! I’m tagging in one of our code gurus @ikbelkirasan to see if they may have a suggestion for you here. 

@ikbelkirasan thank you for any guidance you may be able to share!


@Liz_Roberts - Thanks for the mention! I’ll try and help :)

@taylornd Try using the code snippet from this gist to extract the reply fragment.

 


Brilliant. Thanks.


I had a go at adapting this https://github.com/zapier/email-reply-parser which seemed to work as well.

"""
email_reply_parser is a python library port of GitHub's Email Reply Parser.

For more information, visit https://github.com/zapier/email-reply-parser
"""

import re


class EmailReplyParser(object):
""" Represents a email message that is parsed.
"""

@staticmethod
def read(text):
""" Factory method that splits email into list of fragments

text - A string email body

Returns an EmailMessage instance
"""
return EmailMessage(text).read()

@staticmethod
def parse_reply(text):
""" Provides the reply portion of email.

text - A string email body

Returns reply body message
"""
return EmailReplyParser.read(text).reply


class EmailMessage(object):
""" An email message represents a parsed email body.
"""

SIG_REGEX = re.compile(r'(--|__|-\w)|(^Sent from my (\w+\s*))')
QUOTE_HDR_REGEX = re.compile('On.*wrote:$')
QUOTED_REGEX = re.compile(r'(>+)')
HEADER_REGEX = re.compile(r'^\*?(From|Sent|To|Subject):\*? .+')
_MULTI_QUOTE_HDR_REGEX = r'(?!On.*On\s.+?wrote:)(On\s(.+?)wrote:)'
MULTI_QUOTE_HDR_REGEX = re.compile(_MULTI_QUOTE_HDR_REGEX, re.DOTALL | re.MULTILINE)
MULTI_QUOTE_HDR_REGEX_MULTILINE = re.compile(_MULTI_QUOTE_HDR_REGEX, re.DOTALL)

def __init__(self, text):
self.fragments = =]
self.fragment = None
self.text = text.replace('\r\n', '\n')
self.found_visible = False

def read(self):
""" Creates new fragment for each line
and labels as a signature, quote, or hidden.

Returns EmailMessage instance
"""

self.found_visible = False

is_multi_quote_header = self.MULTI_QUOTE_HDR_REGEX_MULTILINE.search(self.text)
if is_multi_quote_header:
self.text = self.MULTI_QUOTE_HDR_REGEX.sub(is_multi_quote_header.groups()(0].replace('\n', ''), self.text)

# Fix any outlook style replies, with the reply immediately above the signature boundary line
# See email_2_2.txt for an example
self.text = re.sub('('^\n])(?=\n ? _-])', '\\1\n', self.text, re.MULTILINE)

self.lines = self.text.split('\n')
self.lines.reverse()

for line in self.lines:
self._scan_line(line)

self._finish_fragment()

self.fragments.reverse()

return self

@property
def reply(self):
""" Captures reply message within email
"""
reply = =]
for f in self.fragments:
if not (f.hidden or f.quoted):
reply.append(f.content)
return '\n'.join(reply)

def _scan_line(self, line):
""" Reviews each line in email message and determines fragment type

line - a row of text from an email message
"""
is_quote_header = self.QUOTE_HDR_REGEX.match(line) is not None
is_quoted = self.QUOTED_REGEX.match(line) is not None
is_header = is_quote_header or self.HEADER_REGEX.match(line) is not None

if self.fragment and len(line.strip()) == 0:
if self.SIG_REGEX.match(self.fragment.linese-1].strip()):
self.fragment.signature = True
self._finish_fragment()

if self.fragment \
and ((self.fragment.headers == is_header and self.fragment.quoted == is_quoted) or
(self.fragment.quoted and (is_quote_header or len(line.strip()) == 0))):

self.fragment.lines.append(line)
else:
self._finish_fragment()
self.fragment = Fragment(is_quoted, line, headers=is_header)

def quote_header(self, line):
""" Determines whether line is part of a quoted area

line - a row of the email message

Returns True or False
"""
return self.QUOTE_HDR_REGEX.match(linen::-1]) is not None

def _finish_fragment(self):
""" Creates fragment
"""

if self.fragment:
self.fragment.finish()
if self.fragment.headers:
# Regardless of what's been seen to this point, if we encounter a headers fragment,
# all the previous fragments should be marked hidden and found_visible set to False.
self.found_visible = False
for f in self.fragments:
f.hidden = True
if not self.found_visible:
if self.fragment.quoted \
or self.fragment.headers \
or self.fragment.signature \
or (len(self.fragment.content.strip()) == 0):

self.fragment.hidden = True
else:
self.found_visible = True
self.fragments.append(self.fragment)
self.fragment = None


class Fragment(object):
""" A Fragment is a part of
an Email Message, labeling each part.
"""

def __init__(self, quoted, first_line, headers=False):
self.signature = False
self.headers = headers
self.hidden = False
self.quoted = quoted
self._content = None
self.lines = =first_line]

def finish(self):
""" Creates block of content with lines
belonging to fragment.
"""
self.lines.reverse()
self._content = '\n'.join(self.lines)
self.lines = None

@property
def content(self):
return self._content.strip()
return {'emailstring': EmailReplyParser.parse_reply(input_datat'body'])}