Published: 2018-09-09

Python3和golang中bytes与string的转换

本文是最近对于python和golang中的bytes与string互相转换的总结与记录。

Table of Contents

1 Bytes 创建

# Python
my_bytes = bytes([0x06, 0xe5, 0x33, 0xfd, 0x1a, 0xda, 0x86, 0x39,
                  0x1f, 0x3f, 0x6c, 0x34, 0x32, 0x04, 0xb0, 0xd2,
                  0x78, 0xd4, 0xaa, 0xec, 0x1c, 0x0b, 0x20, 0xaa,
                  0x27, 0xba, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00])


// Golang:(忽略变量命名规范)
my_bytes := []byte{0x79, 0xa6, 0x1a, 0xdb, 0xc6, 0xe5, 0xa2, 0xe1,
                   0x39, 0xd2, 0x71, 0x3a, 0x54, 0x6e, 0xc7, 0xc8,
                   0x75, 0x63, 0x2e, 0x75, 0xf1, 0xdf, 0x9c, 0x3f,
                   0xa6, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}

2 Bytes -> String

# Pyython:
my_bytes = bytes([104, 101, 108, 108, 111])
my_bytes.decode()  # 默认utf-8


// Golang
my_bytes := []byte{104, 101, 108, 108, 111}
my_string := string(my_bytes[:])


3 String -> Bytes

# Python
# 方法1
my_string = "hello"
my_string.encode()

# 方法2
my_string = "hello"
bytes(my_string, encoding='utf8')


# 方法3
b'hello'  # 字面量创建



// Golang
my_string = "hello"
my_bytes = []byte(my_string)  // golang 字符串字面量是utf8的


4 Bytes -> Hex string

# Python
my_bytes = bytes([0x06, 0xe5, 0x33, 0xfd, 0x1a, 0xda, 0x86, 0x39,
                  0x1f, 0x3f, 0x6c, 0x34, 0x32, 0x04, 0xb0, 0xd2,
                  0x78, 0xd4, 0xaa, 0xec, 0x1c, 0x0b, 0x20, 0xaa,
                  0x27, 0xba, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00])



# 返回一个字符串,两个十六进制的字符表示一个byte(Return a string object containing two hexadecimal digits for each byte in the instance)
my_hex = my_bytes.hex()


// Golang

my_bytes := []byte{0x79, 0xa6, 0x1a, 0xdb, 0xc6, 0xe5, 0xa2, 0xe1,
                   0x39, 0xd2, 0x71, 0x3a, 0x54, 0x6e, 0xc7, 0xc8,
                   0x75, 0x63, 0x2e, 0x75, 0xf1, 0xdf, 0x9c, 0x3f,
                   0xa6, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}

my_hex = hex.EncodeToString(my_bytes)  // 返回十六进制编码字符串


5 Hex string -> Bytes

# Python:
my_bytes = bytes.fromhex(my_hex)

//  Golang:
my_bytes = hex.DecodeString(my_hex)



6 Other

6.1 小心字符串长度

对于字符串 "Hello, 世界" 它的长度是多少呢?

这取决于什么算是一个长度。

Golang的len文档如下:

func len(v Type) int

The len built-in function returns the length of v, according to its type:

Array: the number of elements in v.
Pointer to array: the number of elements in *v (even if v is nil).
Slice, or map: the number of elements in v; if v is nil, len(v) is zero.
String: the number of bytes in v.                                -> 注意,如果是字符串,那么值是bytes的数量
Channel: the number of elements queued (unread) in the channel buffer;
if v is nil, len(v) is zero.

package main

import (
        "fmt"
)

func main() {
        fmt.Println(len("Hello, 世界"))
        fmt.Println(len([]byte("Hello, 世界")))
        fmt.Println([]byte("Hello, 世界"))
}

输出:
13
13
[72 101 108 108 111 44 32 228 184 150 231 149 140]

而Python中的len文档是这样描述的:

Return the length (the number of items) of an object. The argument may be a sequence (such as a string, bytes, tuple, list, or range) or a collection (such as a dictionary, set, or frozen set).

它返回的是number of items

In [1]: print(len("Hello, 世界"))
9

In [2]: print(len("Hello, 世界".encode()))
13

In [4]: for x in "Hello, 世界".encode():
   ...:     print(x, end=" ")
   ...:
72 101 108 108 111 44 32 228 184 150 231 149 140

In [5]: result = []

In [6]: for x in "Hello, 世界".encode():
   ...:     result.append(x)

In [7]: x
Out[7]: 140

In [8]: result
Out[8]: [72, 101, 108, 108, 111, 44, 32, 228, 184, 150, 231, 149, 140]

In [9]: bytes(result)
Out[9]: b'Hello, \xe4\xb8\x96\xe7\x95\x8c'

在golang里,对于字符串,如果要像python一样返回"item"的数量(go里叫runes),可以像下面这样:

  1. utf8.RuneCountInString
package main

import (
        "fmt"
        "unicode/utf8"
)

func main() {
        fmt.Println(len("Hello, 世界"))
        fmt.Println(utf8.RuneCountInString(("Hello, 世界")))  // -> 这里
        fmt.Println(len([]byte("Hello, 世界")))
        fmt.Println([]byte("Hello, 世界"))
}

输出:
13
9  -> 这里变成了9
13
[72 101 108 108 111 44 32 228 184 150 231 149 140]
  1. 用rune
package main

import (
        "fmt"
)

func main() {
        fmt.Println(len("Hello, 世界"))
        fmt.Println(len([]byte("Hello, 世界")))
        fmt.Println([]byte("Hello, 世界"))
        fmt.Println("--seprator--")
        fmt.Println([]rune("Hello, 世界"))
        fmt.Println(len([]rune("Hello, 世界")))    // -> 这里
}

输出:
13
13
[72 101 108 108 111 44 32 228 184 150 231 149 140]
--seprator--
[72 101 108 108 111 44 32 19990 30028]
9  -> 这里

7 END

Author: Nisen

Email: imnisen@163.com