Go 正则匹配之跨行匹配

2023-12-29 15:35:13

跨行匹配

使用 `(?s)` 来启用多行模式

package main

import (
	"fmt"
	"regexp"
)

func main() {
	data := "This is the first line. \nAnd this is the second line."
	// fmt.Println(data)
	// re := regexp.MustCompile(`line.*And`)
	re := regexp.MustCompile(`(?s)line.*And`)
	match := re.FindString(data)
	fmt.Println(match)
	out := re.ReplaceAllString(data, "------")
	fmt.Println(out)
}
line. 
And
This is the first ------ this is the second line.

go 正则匹配相关的其他常用函数

Compile、MustCompile

同样的功能,不同的设计:

  1. Compile函数基于错误处理设计,将正则表达式编译成有效的可匹配格式,适用于用户输入场景。当用户输入的正则表达式不合法时,该函数会返回一个错误。
  2. MustCompile函数基于异常处理设计,适用于硬编码场景。当调用者明确知道输入不会引起函数错误时,要求调用者检查这个错误是不必要和累赘的。我们应该假设函数的输入一直合法,当调用者输入了不应该出现的输入时,就触发panic异常。

其实直接从 MustCompile 的实现可以看出,MustCompile 本质上是调用的 Compile ,如果表达式编译失败,直接 panic ,而 Compile 则会把 err 返回,由用户决定是否 panic 或进行其他处理:

// MustCompile is like Compile but panics if the expression cannot be parsed.
// It simplifies safe initialization of global variables holding compiled regular
// expressions.
func MustCompile(str string) *Regexp {
	regexp, err := Compile(str)
	if err != nil {
		panic(`regexp: Compile(` + quote(str) + `): ` + err.Error())
	}
	return regexp
}

FindString

FindString 用来返回匹配到的第一个字符串。

package main

import (
	"fmt"
	"regexp"
)

func main() {
	data := "This is the first line. \nAnd this is the second line."
	re := regexp.MustCompile(`the .* line`)
	match := re.FindString(data)
	fmt.Println(match) // the first line
}

FindAllString

FindString 用来返回匹配到的所有的字符串。用户可以指定想要返回的匹配到的字符串的个数。

package main

import (
	"fmt"
	"regexp"
)

func main() {
	data := "This is the first line. \nAnd this is the second line."
	re := regexp.MustCompile(`the .* line`)
	match := re.FindAllString(data, 3)
	fmt.Printf("%#v\n", match) // []string{"the first line", "the second line"}
}

Find

类似于FindString,只不过以字节数组的形式表示。

package main

import (
	"fmt"
	"regexp"
)

func main() {
	data := "This is the first line. \nAnd this is the second line."
	re := regexp.MustCompile(`the .* line`)
	match := re.Find([]byte(data))
	// match := re.FindAll([]byte(data), 3)
	fmt.Printf("%#v\n", match)         // []byte{0x74, 0x68, 0x65, 0x20, 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x6c, 0x69, 0x6e, 0x65}
	fmt.Printf("%#v\n", string(match)) // "the first line"
}

FindAll

package main

import (
	"fmt"
	"regexp"
)

func main() {
	data := "This is the first line. \nAnd this is the second line."
	re := regexp.MustCompile(`the .* line`)
	// match := re.Find([]byte(data))
	match := re.FindAll([]byte(data), 3)
	fmt.Printf("%#v\n", match) // [][]uint8{[]uint8{0x74, 0x68, 0x65, 0x20, 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x6c, 0x69, 0x6e, 0x65}, []uint8{0x74, 0x68, 0x65, 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x20, 0x6c, 0x69, 0x6e, 0x65}}
}

FindIndex

返回匹配字符串的起始位置和结束位置索引,未匹配到的话返回 nil

package main

import (
	"fmt"
	"regexp"
)

func main() {
	data := "This is the first line. \nAnd this is the second line."
	re := regexp.MustCompile(`the .* line`)
	match := re.FindIndex([]byte(data))
	// match := re.FindAll([]byte(data), 3)
	fmt.Printf("%#v\n", match)                   // []int{8, 22}
	if match == nil {
		fmt.Println("match is nil")
		return
	}
	fmt.Printf("%#v\n", data[match[0]:match[1]]) // "the first line"
}

临时插入一个小知识,声明一个未初始化的?Array 或?Map ,其值初始为 nil。

package main

import (
	"fmt"
)

func main() {
	var a []int
	fmt.Printf("%#v\n", a) // []int(nil)
	fmt.Println(a == nil)  // true

	a = []int{}
	fmt.Printf("%#v\n", a) // []int{}
	fmt.Println(a == nil)  // false

	var m map[string]string
	fmt.Printf("%#v\n", m) // map[string]string(nil)
	fmt.Println(m == nil)  // true

	m = map[string]string{}
	fmt.Printf("%#v\n", m) // map[string]string{}
	fmt.Println(m == nil)  // false
}

FindAllIndex

package main

import (
	"fmt"
	"regexp"
)

func main() {
	data := "This is the first line. \nAnd this is the second line."
	re := regexp.MustCompile(`the .* line`)
	match := re.FindAllIndex([]byte(data), 3)
	// match := re.FindAll([]byte(data), 3)
	fmt.Printf("%#v\n", match) // [][]int{[]int{8, 22}, []int{37, 52}}
	if match == nil {
		fmt.Println("match is nil")
		return
	}
	for _, m := range match {
		fmt.Printf("%#v\n", data[m[0]:m[1]]) // "the first line"  "the second line"
	}
}

FindSubMatch

有些例子比较简单,就不多描述了

package main

import (
	"fmt"
	"regexp"
)

func main() {
	data := "This is the first line. \nAnd this is the second line."
	re := regexp.MustCompile(`the .* line`)
	match := re.FindSubmatch([]byte(data))
	fmt.Printf("%#v\n", match) // [][]uint8{[]uint8{0x74, 0x68, 0x65, 0x20, 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x6c, 0x69, 0x6e, 0x65}}
}

FindAllSubMatch

package main

import (
	"fmt"
	"regexp"
)

func main() {
	data := "This is the first line. \nAnd this is the second line."
	re := regexp.MustCompile(`the .* line`)
	match := re.FindAllSubmatch([]byte(data), 3)
	fmt.Printf("%#v\n", match) // [][][]uint8{[][]uint8{[]uint8{0x74, 0x68, 0x65, 0x20, 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x6c, 0x69, 0x6e, 0x65}}, 
									  //             [][]uint8{[]uint8{0x74, 0x68, 0x65, 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x20, 0x6c, 0x69, 0x6e, 0x65}}}
}

FindStringSubMatch

package main

import (
	"fmt"
	"regexp"
)

func main() {
	data := "This is the first line. \nAnd this is the second line."
	re := regexp.MustCompile(`the .* line`)
	match := re.FindStringSubmatch(data)
	fmt.Printf("%#v\n", match) // []string{"the first line"}
}

FindAllStringSubMatch

package main

import (
	"fmt"
	"regexp"
)

func main() {
	data := "This is the first line. \nAnd this is the second line."
	re := regexp.MustCompile(`the .* line`)
	match := re.FindAllStringSubmatch(data, 3)
	fmt.Printf("%#v\n", match) // [][]string{[]string{"the first line"}, []string{"the second line"}}
}

ReplaceAllString

将所有匹配到的字符串使用给定字符串进行替换。替换的字符可以引用匹配组的内容。

package main

import (
	"fmt"
	"regexp"
)

func main() {
	data := "This is the first line. \nAnd this is the second line."
	re := regexp.MustCompile(`the (.*) line`)
	out := re.ReplaceAllString(data, "$1") 
	fmt.Printf("%#v\n", out) // "This is first. \nAnd this is second."
}

ReplaceAllLiteralString

替换的字符串被当作字符串字面量进行处理。

package main

import (
	"fmt"
	"regexp"
)

func main() {
	data := "This is the first line. \nAnd this is the second line."
	re := regexp.MustCompile(`the (.*) line`)
	out := re.ReplaceAllLiteralString(data, "$1")
	fmt.Printf("%#v\n", out) // "This is $1. \nAnd this is $1."
}

ReplaceAll

package main

import (
	"fmt"
	"regexp"
)

func main() {
	data := "This is the first line. \nAnd this is the second line."
	re := regexp.MustCompile(`the .* line`)
	out := re.ReplaceAll([]byte(data), []byte("---replace string---"))
	fmt.Printf("%#v\n", out)         // []byte{0x54, 0x68, 0x69, 0x73, 0x20, 0x69, 0x73, 0x20, 0x2d, 0x2d, 0x2d, 0x72, 0x65, 0x70, 0x6c, 0x61, 0x63, 0x65, 0x20, 0x73, 0x74, 0x72, 0x69, 0x6e, 0x67, 0x2d, 0x2d, 0x2d, 0x2e, 0x20, 0xa, 0x41, 0x6e, 0x64, 0x20, 0x74, 0x68, 0x69, 0x73, 0x20, 0x69, 0x73, 0x20, 0x2d, 0x2d, 0x2d, 0x72, 0x65, 0x70, 0x6c, 0x61, 0x63, 0x65, 0x20, 0x73, 0x74, 0x72, 0x69, 0x6e, 0x67, 0x2d, 0x2d, 0x2d, 0x2e}
	fmt.Printf("%#v\n", string(out)) // "This is ---replace string---. \nAnd this is ---replace string---."
}

文章来源:https://blog.csdn.net/TomorrowAndTuture/article/details/135274884
本文来自互联网用户投稿,该文观点仅代表作者本人,不代表本站立场。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。