| 12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455 |
- # Copyright (c) Opendatalab. All rights reserved.
- import re
- def is_hyphen_at_line_end(line):
- """Check if a line ends with one or more letters followed by a hyphen.
- Args:
- line (str): The line of text to check.
- Returns:
- bool: True if the line ends with one or more letters followed by a hyphen, False otherwise.
- """
- # Use regex to check if the line ends with one or more letters followed by a hyphen
- return bool(re.search(r'[A-Za-z]+-\s*$', line))
- def full_to_half_exclude_marks(text: str) -> str:
- """Convert full-width characters to half-width characters using code point manipulation.
- Args:
- text: String containing full-width characters
- Returns:
- String with full-width characters converted to half-width
- """
- result = []
- for char in text:
- code = ord(char)
- # Full-width letters and numbers (FF21-FF3A for A-Z, FF41-FF5A for a-z, FF10-FF19 for 0-9)
- if (0xFF21 <= code <= 0xFF3A) or (0xFF41 <= code <= 0xFF5A) or (0xFF10 <= code <= 0xFF19):
- result.append(chr(code - 0xFEE0)) # Shift to ASCII range
- else:
- result.append(char)
- return ''.join(result)
- def full_to_half(text: str) -> str:
- """Convert full-width characters to half-width characters using code point manipulation.
- Args:
- text: String containing full-width characters
- Returns:
- String with full-width characters converted to half-width
- """
- result = []
- for char in text:
- code = ord(char)
- # Full-width letters, numbers and punctuation (FF01-FF5E)
- if 0xFF01 <= code <= 0xFF5E:
- result.append(chr(code - 0xFEE0)) # Shift to ASCII range
- else:
- result.append(char)
- return ''.join(result)
|