Fix unicode errors generting diffs with git-p4
Review Request #12255 — Created April 25, 2022 and submitted
When using git-p4 as the underlying client with python 3 runtime, a UnicodeDecodeError is thrown if any of the files to be diffed contained non-ASCII text. This was because the `make_perforce_diff` function was using a mixture of bytes and str objects. The solution is to keep everything in bytes and never try to intepret the characters returned from the diff other than in very specific ways it always has.
This was a frequent error for us at work. I posted the solution to the issue tracker a long time ago, but I've since been unable to find the specific issue that was raised. I have been using this patch for at least 3 years.
Add a file named
test-Œ.txt
with contentsŒŒŒŒ
.
rbt post
that.You can see inthe
--debug
output that the commands were successful:>>> Running: p4 files //depot/MMA/core/main/test/test-Œ.txt@2399980 >>> Running: p4 files //depot/MMA/core/main/test/test.txt@2399980
I added
print()
statement and inspected the generated diff too:Unicode: --- //depot/MMA/core/main/test/test-Œ.txt //depot/MMA/core/main/test/test-Œ.txt#1 +++ //depot/MMA/core/main/test/test-Œ.txt TIMESTAMP @@ -0,0 +1 @@ +ŒŒŒŒ --- //depot/MMA/core/main/test/test.txt //depot/MMA/core/main/test/test.txt#1 +++ //depot/MMA/core/main/test/test.txt TIMESTAMP @@ -0,0 +1 @@ +ŒŒŒŒŒŒŒ b'Raw: --- //depot/MMA/core/main/test/test-\xc5\x92.txt\t//depot/MMA/core/main/test/test-\xc5\x92.txt#1\n+++ //depot/MMA/core/main/test/test-\xc5\x92.txt\tTIMESTAMP\n@@ -0,0 +1 @@\n+\xc5\x92\xc5\x92\xc5\x92\xc5\x92\n--- //depot/MMA/core/main/test/test.txt\t//depot/MMA/core/main/test/test.txt#1\n+++ //depot/MMA/core/main/test/test.txt\tTIMESTAMP\n@@ -0,0 +1 @@\n+\xc5\x92\xc5\x92\xc5\x92\xc5\x92\xc5\x92\xc5\x92\xc5\x92\n'
Funny enought he server version we're on just aborts the request, but i think the client behaved correctly:
>>> Command line: rbt post --debug -r 93808 HEAD >>> Running: git rev-parse HEAD >>> Running: git rev-parse 48ecac22ee23eb524754fd08b1c651584104786c^ >>> Running: git branch --remotes >>> Running: git rev-list 6e25de453e378d6f3a5927a5ed1d17b15a7517b9 --not --remotes=p4 >>> Making HTTP GET request to http://reviews.eu.fidessa.priv/api/review-requests/93808/?only-fields=absolute_url%2Cbugs_closed%2Cid%2Cstatus%2Cpublic&only-links=diffs%2Cdraft >>> Cached response for HTTP GET http://reviews.eu.fidessa.priv/api/review-requests/93808/?only-fields=absolute_url%2Cbugs_closed%2Cid%2Cstatus%2Cpublic&only-links=diffs%2Cdraft expired and was not modified >>> Running: git version >>> Running: git -c core.quotepath=false diff --no-color --no-prefix -r -u --no-ext-diff 6e25de453e378d6f3a5927a5ed1d17b15a7517b9..48ecac22ee23eb524754fd08b1c651584104786c >>> Running: git log 6e25de453e378d6f3a5927a5ed1d17b15a7517b9 >>> Running: p4 files //depot/MMA/core/main/test/test-Œ.txt@2399980 >>> Running: p4 files //depot/MMA/core/main/test/test.txt@2399980 >>> Making HTTP GET request to http://reviews.eu.fidessa.priv/api/validation/diffs/ >>> Cached response for HTTP GET http://reviews.eu.fidessa.priv/api/validation/diffs/ expired and was modified >>> Making HTTP POST request to http://reviews.eu.fidessa.priv/api/validation/diffs/ Traceback (most recent call last): File "/mma/users/benj/.local/bin/rbt", line 33, in <module> sys.exit(load_entry_point('RBTools', 'console_scripts', 'rbt')()) File "/mma/users/benj/proj/rbtools/rbtools/commands/main.py", line 207, in main command.run_from_argv([RB_MAIN, command_name] + args) File "/mma/users/benj/proj/rbtools/rbtools/commands/__init__.py", line 1043, in run_from_argv exit_code = self.main(*args) or 0 File "/mma/users/benj/proj/rbtools/rbtools/commands/post.py", line 1022, in main self._validate_squashed_diff(squashed_diff) File "/mma/users/benj/proj/rbtools/rbtools/commands/post.py", line 1383, in _validate_squashed_diff **validate_kwargs) File "/mma/users/benj/proj/rbtools/rbtools/api/decorators.py", line 27, in request_method *args, **kwargs) File "/mma/users/benj/proj/rbtools/rbtools/api/transport/sync.py", line 83, in execute_request_method return self._execute_request(request) File "/mma/users/benj/proj/rbtools/rbtools/api/transport/sync.py", line 92, in _execute_request rsp = self.server.make_request(request) File "/mma/users/benj/proj/rbtools/rbtools/api/request.py", line 827, in make_request request.url, body, headers, request.method)) File "/mma/users/benj/proj/rbtools/rbtools/api/cache.py", line 209, in make_request return self.urlopen(request) File "/opt/rh/rh-python36/root/usr/lib64/python3.6/urllib/request.py", line 223, in urlopen return opener.open(url, data, timeout) File "/opt/rh/rh-python36/root/usr/lib64/python3.6/urllib/request.py", line 526, in open response = self._open(req, data) File "/opt/rh/rh-python36/root/usr/lib64/python3.6/urllib/request.py", line 544, in _open '_open', req) File "/opt/rh/rh-python36/root/usr/lib64/python3.6/urllib/request.py", line 504, in _call_chain result = func(*args) File "/opt/rh/rh-python36/root/usr/lib64/python3.6/urllib/request.py", line 1346, in http_open return self.do_open(http.client.HTTPConnection, req) File "/opt/rh/rh-python36/root/usr/lib64/python3.6/urllib/request.py", line 1321, in do_open r = h.getresponse() File "/opt/rh/rh-python36/root/usr/lib64/python3.6/http/client.py", line 1331, in getresponse response.begin() File "/opt/rh/rh-python36/root/usr/lib64/python3.6/http/client.py", line 297, in begin version, status, reason = self._read_status() File "/opt/rh/rh-python36/root/usr/lib64/python3.6/http/client.py", line 266, in _read_status raise RemoteDisconnected("Remote end closed connection without" http.client.RemoteDisconnected: Remote end closed connection without response
Summary | ID |
---|---|
a95e1f9b2b80ec8610faa7fc6c13c62c9fcd9fcb |
Description | From | Last Updated |
---|---|---|
I'm a little concerned about the server response. That just shouldn't happen. Was that repeatable? I'd love to see the … |
chipx86 | |
execute() expects commands to be built as Unicode strings, for greatest compatibility across Python versions. I wanted to determine whether … |
chipx86 | |
The outer parens shouldn't be needed here, as % will take precedence and the parens of args will handle line … |
chipx86 |
-
-
execute()
expects commands to be built as Unicode strings, for greatest compatibility across Python versions. I wanted to determine whether it was safe to maybe acceptbytes
, so I dug into the calls being made. There are three cases where the command is used:-
subprocess.Popen()
, which does acceptbytes
on Python 2.7 and 3.6-3.10, but is documented to only accept strings. Unsure if this behavior may change in the future. -
subprocess.list2cmdline
, which does not acceptbytes
on Python 3.6 or 3.7. We call this, but we do force strings to be Unicode before passing them in. -
Logging and error handling, which returns a string representation (regardless of type).
I think ideally we'd accept
bytes
where possible, but we already do force toUnicode
forlist2cmdline
. Given that, and the documentation aroundPopen()
's string types, my feeling is that we should build this up as Unicode strings instead ofbytes
.You can pass the
base_path + ...
toforce_unicode()
to make a best-attempt at that. -
-
The outer parens shouldn't be needed here, as
%
will take precedence and the parens of args will handle line continuation.Same below.
- Change Summary:
-
Only pass unicode/str to self._exectue()
Don't use ((Extraneous) (extra) parentheses)Update test notes with some actual testing
- Testing Done:
-
This was a frequent error for us at work. I posted the solution to the issue tracker a long time ago, but I've since been unable to find the specific issue that was raised. I have been using this patch for at least 3 years.
+ + + + Add a file named
test-Œ.txt
with contentsŒŒŒŒ
.+ rbt post
that.+ + You can see inthe
--debug
output that the commands were successful:+ + + + >>> Running: p4 files //depot/MMA/core/main/test/test-Œ.txt@2399980
+ >>> Running: p4 files //depot/MMA/core/main/test/test.txt@2399980
+ + + + + I added
print()
statement and inspected the generated diff too:+ + + + Unicode: --- //depot/MMA/core/main/test/test-Œ.txt //depot/MMA/core/main/test/test-Œ.txt#1
+ +++ //depot/MMA/core/main/test/test-Œ.txt TIMESTAMP
+ @@ -0,0 +1 @@
+ +ŒŒŒŒ
+ --- //depot/MMA/core/main/test/test.txt //depot/MMA/core/main/test/test.txt#1
+ +++ //depot/MMA/core/main/test/test.txt TIMESTAMP
+ @@ -0,0 +1 @@
+ +ŒŒŒŒŒŒŒ
+ + b'Raw: --- //depot/MMA/core/main/test/test-\xc5\x92.txt\t//depot/MMA/core/main/test/test-\xc5\x92.txt#1\n+++ //depot/MMA/core/main/test/test-\xc5\x92.txt\tTIMESTAMP\n@@ -0,0 +1 @@\n+\xc5\x92\xc5\x92\xc5\x92\xc5\x92\n--- //depot/MMA/core/main/test/test.txt\t//depot/MMA/core/main/test/test.txt#1\n+++ //depot/MMA/core/main/test/test.txt\tTIMESTAMP\n@@ -0,0 +1 @@\n+\xc5\x92\xc5\x92\xc5\x92\xc5\x92\xc5\x92\xc5\x92\xc5\x92\n'
+ + + + + Funny enought he server version we're on just aborts the request, but i think the client behaved correctly:
+ + + + >>> Command line: rbt post --debug -r 93808 HEAD
+ >>> Running: git rev-parse HEAD
+ >>> Running: git rev-parse 48ecac22ee23eb524754fd08b1c651584104786c^
+ >>> Running: git branch --remotes
+ >>> Running: git rev-list 6e25de453e378d6f3a5927a5ed1d17b15a7517b9 --not --remotes=p4
+ >>> Making HTTP GET request to http://reviews.eu.fidessa.priv/api/review-requests/93808/?only-fields=absolute_url%2Cbugs_closed%2Cid%2Cstatus%2Cpublic&only-links=diffs%2Cdraft
+ >>> Cached response for HTTP GET http://reviews.eu.fidessa.priv/api/review-requests/93808/?only-fields=absolute_url%2Cbugs_closed%2Cid%2Cstatus%2Cpublic&only-links=diffs%2Cdraft expired and was not modified
+ >>> Running: git version
+ >>> Running: git -c core.quotepath=false diff --no-color --no-prefix -r -u --no-ext-diff 6e25de453e378d6f3a5927a5ed1d17b15a7517b9..48ecac22ee23eb524754fd08b1c651584104786c
+ >>> Running: git log 6e25de453e378d6f3a5927a5ed1d17b15a7517b9
+ >>> Running: p4 files //depot/MMA/core/main/test/test-Œ.txt@2399980
+ >>> Running: p4 files //depot/MMA/core/main/test/test.txt@2399980
+ >>> Making HTTP GET request to http://reviews.eu.fidessa.priv/api/validation/diffs/
+ >>> Cached response for HTTP GET http://reviews.eu.fidessa.priv/api/validation/diffs/ expired and was modified
+ >>> Making HTTP POST request to http://reviews.eu.fidessa.priv/api/validation/diffs/
+ Traceback (most recent call last):
+ File "/mma/users/benj/.local/bin/rbt", line 33, in <module>
+ sys.exit(load_entry_point('RBTools', 'console_scripts', 'rbt')())
+ File "/mma/users/benj/proj/rbtools/rbtools/commands/main.py", line 207, in main
+ command.run_from_argv([RB_MAIN, command_name] + args)
+ File "/mma/users/benj/proj/rbtools/rbtools/commands/__init__.py", line 1043, in run_from_argv
+ exit_code = self.main(*args) or 0
+ File "/mma/users/benj/proj/rbtools/rbtools/commands/post.py", line 1022, in main
+ self._validate_squashed_diff(squashed_diff)
+ File "/mma/users/benj/proj/rbtools/rbtools/commands/post.py", line 1383, in _validate_squashed_diff
+ **validate_kwargs)
+ File "/mma/users/benj/proj/rbtools/rbtools/api/decorators.py", line 27, in request_method
+ *args, **kwargs)
+ File "/mma/users/benj/proj/rbtools/rbtools/api/transport/sync.py", line 83, in execute_request_method
+ return self._execute_request(request)
+ File "/mma/users/benj/proj/rbtools/rbtools/api/transport/sync.py", line 92, in _execute_request
+ rsp = self.server.make_request(request)
+ File "/mma/users/benj/proj/rbtools/rbtools/api/request.py", line 827, in make_request
+ request.url, body, headers, request.method))
+ File "/mma/users/benj/proj/rbtools/rbtools/api/cache.py", line 209, in make_request
+ return self.urlopen(request)
+ File "/opt/rh/rh-python36/root/usr/lib64/python3.6/urllib/request.py", line 223, in urlopen
+ return opener.open(url, data, timeout)
+ File "/opt/rh/rh-python36/root/usr/lib64/python3.6/urllib/request.py", line 526, in open
+ response = self._open(req, data)
+ File "/opt/rh/rh-python36/root/usr/lib64/python3.6/urllib/request.py", line 544, in _open
+ '_open', req)
+ File "/opt/rh/rh-python36/root/usr/lib64/python3.6/urllib/request.py", line 504, in _call_chain
+ result = func(*args)
+ File "/opt/rh/rh-python36/root/usr/lib64/python3.6/urllib/request.py", line 1346, in http_open
+ return self.do_open(http.client.HTTPConnection, req)
+ File "/opt/rh/rh-python36/root/usr/lib64/python3.6/urllib/request.py", line 1321, in do_open
+ r = h.getresponse()
+ File "/opt/rh/rh-python36/root/usr/lib64/python3.6/http/client.py", line 1331, in getresponse
+ response.begin()
+ File "/opt/rh/rh-python36/root/usr/lib64/python3.6/http/client.py", line 297, in begin
+ version, status, reason = self._read_status()
+ File "/opt/rh/rh-python36/root/usr/lib64/python3.6/http/client.py", line 266, in _read_status
+ raise RemoteDisconnected("Remote end closed connection without"
+ http.client.RemoteDisconnected: Remote end closed connection without response
+ - Commits:
-
Summary ID ad9a05d4cd701360614ff2cedd5664707d1f84b0 a95e1f9b2b80ec8610faa7fc6c13c62c9fcd9fcb - Diff:
-
Revision 2 (+20 -10)