Skip to content

Commit

Permalink
Update attachment masking
Browse files Browse the repository at this point in the history
Always start with the unmasked attachment body when applying the masks
and censor rules. We're going to make changes which mean
`FoiAttachment#body` will return masked body, fetch this with
`default_body` could result in attachment with old or superseded censor
rules still applied.

Needed to force HTML attachment into CRLF line endings due to an issue
with the mail gem which results in a different hexdigest after
rebuilding the raw emails from a file on disk in the specs.

Once mikel/mail#1512 is merged we can revert the
FoiAttachment factory change.
  • Loading branch information
gbp committed Aug 3, 2023
1 parent 9974cf0 commit 321dfe9
Show file tree
Hide file tree
Showing 11 changed files with 92 additions and 2 deletions.
2 changes: 1 addition & 1 deletion app/jobs/foi_attachment_mask_job.rb
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ def perform(attachment)
@attachment = attachment

body = AlaveteliTextMasker.apply_masks(
attachment.default_body,
attachment.unmasked_body,
attachment.content_type,
masks
)
Expand Down
10 changes: 10 additions & 0 deletions app/models/foi_attachment.rb
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ class FoiAttachment < ApplicationRecord

belongs_to :incoming_message,
inverse_of: :foi_attachments
has_one :raw_email, through: :incoming_message, source: :raw_email

has_one_attached :file, service: :attachments

Expand Down Expand Up @@ -123,6 +124,15 @@ def default_body
text_type? ? body_as_text.string : body
end

# return the body as it is in the raw email, unmasked without censor rules
# applied
def unmasked_body
MailHandler.attachment_body_for_hexdigest(
raw_email.mail,
hexdigest: hexdigest
)
end

def main_body_part?
self == incoming_message.get_main_body_text_part
end
Expand Down
7 changes: 7 additions & 0 deletions lib/mail_handler/backends/mail_backend.rb
Original file line number Diff line number Diff line change
Expand Up @@ -380,6 +380,13 @@ def get_attachment_attributes(mail)
end
end

def attachment_body_for_hexdigest(mail, hexdigest:)
attributes = get_attachment_attributes(mail).find do |attrs|
attrs[:hexdigest] == hexdigest
end
attributes&.fetch(:body)
end

# Format
def address_from_name_and_email(name, email)
unless MySociety::Validate.is_valid_email(email)
Expand Down
1 change: 1 addition & 0 deletions spec/controllers/attachments_controller_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ def show(params = {})
file_name: attachment.display_filename
}
default_params[:id] = info_request.id unless params[:public_token]
rebuild_raw_emails(info_request)
get :show, params: default_params.merge(params)
end

Expand Down
9 changes: 8 additions & 1 deletion spec/factories/foi_attchments.rb
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,14 @@
factory :html_attachment do
content_type { 'text/html' }
filename { 'interesting.html' }
body { load_file_fixture('interesting.html') }
body {
# Needed to force HTML attachment into CRLF line endings due to an issue
# with the mail gem which results in a different hexdigest after
# rebuilding the raw emails.
# Once https://github.com/mikel/mail/pull/1512 is merged we can revert
# the FoiAttachment factory change.
Mail::Utilities.to_crlf(load_file_fixture('interesting.html'))
}
end
factory :jpeg_attachment do
content_type { 'image/jpeg' }
Expand Down
2 changes: 2 additions & 0 deletions spec/integration/prominence/viewing_raw_attachment_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@

let(:within_session) do
-> {
rebuild_raw_emails(info_request)

visit get_attachment_url(
incoming_message_id: attachment.incoming_message_id,
part: attachment.url_part_number,
Expand Down
2 changes: 2 additions & 0 deletions spec/integration/view_request_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,8 @@
info_request = FactoryBot.create(:info_request_with_incoming_attachments)
incoming_message = info_request.incoming_messages.first
attachment_url = "/es/request/#{info_request.id}/response/#{incoming_message.id}/attach/2/interesting.pdf"
rebuild_raw_emails(info_request)

using_session(non_owner) { visit(attachment_url) }
expect(cache_directories_exist?(info_request)).to be true

Expand Down
2 changes: 2 additions & 0 deletions spec/jobs/foi_attachment_mask_job_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@
let(:attachment) { incoming_message.foi_attachments.last }
let(:body) { described_class.new.perform(attachment) }

before { rebuild_raw_emails(info_request) }

it 'sanitises HTML attachments' do
# Nokogiri adds the meta tag; see
# https://github.com/sparklemotion/nokogiri/issues/1008
Expand Down
25 changes: 25 additions & 0 deletions spec/lib/mail_handler/backends/mail_backend_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -406,4 +406,29 @@
to eq(['[email protected]'])
end
end

describe 'attachment_body_for_hexdigest' do
let(:mail) do
Mail.new do
add_file filename: 'file.txt', content: 'hereisthetext'
end
end

context 'matching hexdigest' do
it 'returns the body of the attachment' do
body = attachment_body_for_hexdigest(
mail, hexdigest: Digest::MD5.hexdigest('hereisthetext')
)

expect(body).to eq('hereisthetext')
end
end

context 'non-matching hexdigest' do
it 'returns nil' do
body = attachment_body_for_hexdigest(mail, hexdigest: 'incorrect')
expect(body).to be_nil
end
end
end
end
15 changes: 15 additions & 0 deletions spec/models/foi_attachment_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,21 @@

end

describe '#unmasked_body' do

it 'returns the attachment body from the raw email' do
foi_attachment = FactoryBot.build(:body_text)

allow(foi_attachment).to receive(:raw_email).
and_return(double.as_null_object)
allow(MailHandler).to receive(:attachment_body_for_hexdigest).
and_return('hereistheunmaskedtext')

expect(foi_attachment.unmasked_body).to eq('hereistheunmaskedtext')
end

end

describe '#main_body_part?' do
subject { attachment.main_body_part? }

Expand Down
19 changes: 19 additions & 0 deletions spec/support/email_helpers.rb
Original file line number Diff line number Diff line change
Expand Up @@ -49,3 +49,22 @@ def gsub_addresses(content, **kargs)
end
content
end

def rebuild_raw_emails(info_request)
info_request.incoming_messages.each do |im|
mail = Mail.new

mail.to = info_request.incoming_email
mail.from = "#{im.from_name} <#{im.from_email}>"
mail.subject = im.subject
mail.date = im.sent_at
mail.body = im.cached_main_body_text_unfolded

im.foi_attachments.each do |a|
mail.add_file filename: a.filename, content: a.file.download
end

im.raw_email.data = mail
im.raw_email.save!
end
end

0 comments on commit 321dfe9

Please sign in to comment.