C language - character functions and string functions

0. Preface

The processing of characters and strings in the C language is very frequent, but the C language itself does not have a string type, and the strings are usually placed in constant strings or character arrays.

String literals are available for string functions that do not modify them.

1. Function introduction

1.1 Find the length of the string

strlen

size_t strlen ( const char * str );
  • The string has '\0' as the end marker, and the strlen function returns the number of characters that appear before '\0' in the string (excluding '\0' )
  • The string pointed to by the parameter must end with '\0'
  • Note that the return value of the function is size_t, which is unsigned (error-prone)

Code:

#include <stdio.h>
#include <string.h>

int main()
{
	const char*str1 = "abcdef";
	const char*str2 = "bbb";
	if(strlen(str2)-strlen(str1)>0)
	{
		printf("str2>str1\n");
	}
	else
	{
		printf("srt1>str2\n");
	}
	return 0;
}

Mock implementation strlen

#include <stdio.h>
#include <string.h>
#include <assert.h>

size_t my_strlen(const char* str)
{
	size_t count = 0;
	assert(str);
	while (*str != '\0')
	{
		count++;
		str++;
	}
	return count;
}
int main()
{
	char arr[] = "abcdef";
	size_t n = my_strlen(arr);
	printf("%u\n", n);//6
    
	return 0;
}

1.2 String functions with unlimited length

strcpy

char* strcpy(char * destination, const char * source );
  • Copies the C string pointed by source into the array pointed by destination, including the terminating null character (and stopping at that point).
  • Source string must end with '\0'
  • will copy the '\0' in the source string to the destination space
  • The destination space must be large enough to hold the source string
  • The target space must be variable

Code:

#include <stdio.h>
#include <string.h>

int main()
{
	char name[10] = "";
	char arr[] = "abcdef";

	strcpy(name, arr);
	printf("%s\n", name);
    
	return 0;
}

Mock implementation of strcpy

#include <stdio.h>
#include <string.h>
#include <assert.h>

char* my_strcpy(char* dest, const char* src)
{
	assert(dest && src);
	char* ret = dest;
	while (*dest++ = *src++)
		;
	return ret;
}
int main()
{
	char arr1[20] = "";
	char arr2[] = "abcdef";
	char* ret = my_strcpy(arr1, arr2);
	printf("%s\n", ret);
    
	return 0;
}

strcat

char * strcat ( char * destination, const char * source );
  • Appends a copy of the source string to the destination string. The terminating null character in destination is overwritten by the first character of source, and a null-character is included at the end of the new string formed by the concatenation of both in destination.
  • Source string must end with '\0'
  • The destination space must be large enough to accommodate the content of the source string
  • The target space must be modifiable

Code:

#include <stdio.h>
#include <string.h>

int main()
{
	char arr1[20] = "hello ";
	char arr2[] = "world";
	strcat(arr1, arr2);
	printf("%s\n", arr1);

    return 0;
}

Mock implementation strcat

#include <stdio.h>
#include <string.h>
#include <assert.h>

char* my_strcat(char* dest, const char* src)
{
	char* ret = dest;
	assert(dest && src);
	//1. Find the end of the target space \0
	while (*dest != '\0')
	{
		dest++;
	}
	//2. Copy the string
	while (*dest++ = *src++)
	{
		;
	}
	return ret;
}
int main()
{
	char arr1[20] = "hello ";
	char arr2[] = "world";
	char* ret = my_strcat(arr1, arr2);
	printf("%s\n", ret);
    
	return 0;
}

strcmp

int strcmp ( const char * str1, const char * str2 );
  • This function starts comparing the first character of each string. If they are equal to each other, it continues with the following pairs until the characters differ or until a terminating null-character is reached.
  • standard regulation:
    1. If the first string is greater than the second string, return a number greater than 0
    2. If the first string is equal to the second string, return 0
    3. If the first string is less than the second string, return a number less than 0

Simulation implementation

#include <stdio.h>
#include <string.h>
#include <assert.h>

int my_strcmp(const char* str1, const char* str2)
{
	assert(str1 && str2);
	while (*str1 == *str2)
	{
		if (*str1 == '\0')
			return 0;//equal
		str1++;
		str2++;
	}
	return (*str1 - *str2);
}
int main()
{
	char arr1[20] = "zhangsan";
	char arr2[] = "zhangsanfeng";

	//Two strings compare equal, strcmp should be used
	int ret = my_strcmp(arr1, arr2);
	if (ret < 0)
		printf("<\n");
	else if (ret == 0)
		printf("==\n");
	else
		printf(">\n");
    
	return 0;
}

1.3 Introduction to String Functions with Limited Length

strncpy

char * strncpy ( char * destination, const char * source, size_t num );
  • Copies the first num characters of source to destination. If the end of the source C string (which is signaled by a null-character) is found before num characters have been copied, destination is padded with zeros until a total of num characters have been written to it.
  • Copy num characters from the source string to the destination space
  • If the length of the source string is less than num, after copying the source string, append 0 to the end of the target until num

Code:

#include <stdio.h>
#include <string.h>

int main()
{
	char arr1[20] = "abcdef";
	char arr2[] = "bit";
	//strcpy(arr1, arr2);
	strncpy(arr1, arr2, 5);
	printf("%s\n", arr1);
    
	return 0;
}

strncat

char * strncat ( char * destination, const char * source, size_t num );
  • Appends the first num characters of source to destination, plus a terminating null-character.
  • If the length of the C string in source is less than num, only the content up to the terminating null-character is copied.

Code:

#include <stdio.h>
#include <string.h>

int main()
{
	char arr1[20] = "hello\0xxxxxx";
	char arr2[] = "bit";
	strncat(arr1, arr2, 6);
	printf("%s\n", arr1);
	return 0;
}

strncmp

int strncmp ( const char * str1, const char * str2, size_t num );
  • Compare until another character is different or a string ends or all num characters are compared.

Code:

#include <stdio.h>
#include <string.h>

int main()
{
	char arr1[] = "abcdef";
	char arr2[] = "abcq";
	int ret = strncmp(arr1, arr2, 4);
	printf("%d\n", ret);

	if (ret == 0)
		printf("==\n");
	else if(ret<0)
		printf("<\n");
	else 
		printf(">\n");
    
	return 0;
}

1.4 String lookup

strstr

char * strstr ( const char *str1, const char * str2);
  • Returns a pointer to the first occurrence of str2 in str1, or a null pointer if str2 is not part of
    str1.
#include <stdio.h>
#include <string.h>

int main()
{
	char email[] = "bye121345@gitee.com";
	char substr[] = "gitee";

	char* ret = strstr(email, substr);
	if (ret == NULL)
	{
		printf("substring does not exist\n");
	}
	else
	{
		printf("%s\n", ret);
	}
	return 0;
}

Mock implements strstr

#include <stdio.h>
#include <string.h>
#include <assert.h>

char* my_strstr(const char* str1, const char* str2)
{
	assert(str1 && str2);
	const char* s1 = str1;
	const char* s2 = str2;
	const char* p = str1;

	while (*p)
	{
		s1 = p;
		s2 = str2;
		while (*s1 != '\0' && *s2 != '\0' && *s1 == *s2)
		{
			s1++;
			s2++;
		}
		if (*s2 == '\0')
		{
			return (char*)p;
		}
		p++;
	}
	return NULL;
}
int main()
{
	char email[] = "bye12345@gitee.com";
	char substr[] = "gitee";

	char* ret = my_strstr(email, substr);
	if (ret == NULL)
	{
		printf("substring does not exist\n");
	}
	else
	{
		printf("%s\n", ret);
	}
	return 0;
}

strtok

char * strtok ( char * str, const char * sep );
  • The sep parameter is a string that defines the set of characters to be used as separators
  • The first parameter specifies a string containing 0 or more tokens separated by one or more delimiters in the sep string
  • The strtok function finds the next token in str, ends it with \0, and returns a pointer to this token. (Note: The strtok function will change the string being manipulated, so the string segmented by the strtok function is generally the content of a temporary copy and can be modified.)
  • The first parameter of the strtok function is not NULL, the function will find the first token in str, and the strtok function will save its position in the string
  • The first parameter of the strtok function is NULL , the function will start at the saved position in the same string and look for the next token
  • Returns a NULL pointer if no more tokens exist in the string.

Code:

#include <stdio.h>
#include <string.h>

int main()
{
	const char* sep = "@.";
	char email[] = "bye12345@gitee.com.net";
	char cp[40] = { 0 };
	strcpy(cp, email);

	char* ret = NULL;
	for (ret = strtok(cp, sep);
		ret != NULL;
		ret = strtok(NULL, sep))
	{
		printf("%s\n", ret);
	}
	return 0;
}

1.5 Error message reporting

strerror

char * strerror ( int errnum );
  • Returns the error code and the corresponding error message.

Code:

#include <stdio.h>
#include <string.h>
#include <errno.h>
//C language library functions will set an error code when the execution fails.
int main()
{
	printf("%s\n", strerror(0));
	printf("%s\n", strerror(1));
	printf("%s\n", strerror(2));
	printf("%s\n", strerror(3));
	printf("%s\n", strerror(4));
	printf("%s\n", strerror(5));

	//errno - a variable stored in a global error code set by the C language
	
	FILE* pf = fopen("C:\\c-language\\c\\txt.txt", "r");
	if (pf == NULL)
	{
		printf("%s\n", strerror(errno));
		return 1;
	}
	else
	{
		//
	}
	return 0;
}

character classification function

functionReturns true if its argument meets the following conditions
iscntrlany control character
isspaceWhitespace characters: space ' ', form feed '\f', line feed '\n', carriage return '\r', tab '\t' or vertical tab '\v'
isdigitDecimal digits 0~9
isxdigitHexadecimal digits, including all decimal digits, lowercase af, uppercase AF
islowerlowercase letters a~z
isupperCapital letters A~Z
isalphaletters az or AZ
isalnumLetters or numbers, az,AZ,0~9
ispunctPunctuation marks, any graphic characters that are not numbers or letters (printable)
isgraphany graphic character
isprintAny printable character, including graphic characters and whitespace
#include <stdio.h>
#include <ctype.h>

int main()
{
   int a = isspace(' ');
   printf("%d\n", a);

   int b = isdigit('x');
   printf("%d\n", b);

   printf("%c\n", tolower('@'));
   return 0;
}

1.6 Memory operation functions

memcpy

void * memcpy ( void * destination, const void * source, size_t num );
  • The function memcpy copies num bytes of data backwards from the location of source to the memory location of destination
  • This function does not stop when it encounters '\0'
  • If there is any overlap between source and destination, the result of the copy is undefined.

Code:

#include <stdio.h>
#include <string.h>

int main()
{
	float arr3[5] = { 1.0,2.5,3.0,5.0,6.0 };
	float arr4[10] = { 0.0 };
	memcpy(arr4, arr3, 20);

	return 0;
}

Simulate the implementation of memcpy

#include <stdio.h>
#include <string.h>
#include <assert.h>

void* my_memcpy(void* dest, const void* src, size_t num)
{
	assert(dest && src);
	void* ret = dest;

	while (num--)
	{
		*(char*)dest = *(char*)src;
		dest = (char*)dest + 1;
		src = (char*)src + 1;
	}

}
int main()
{
	int arr1[] = { 1,2,3,4,5,6,7,8,9,10 };
	int arr2[10] = { 0 };
	my_memcpy(arr2, arr1, 28);

	return 0;
}

memmove

void * memmove ( void * destination, const void * source, size_t num );
  • The difference from memcpy is that the source memory block and the target memory block processed by the memmove function can overlap.
  • If the source space and the target space overlap, you have to use the memmove function to deal with it.

memcmp

int memcmp ( const void * ptr1,
			const void * ptr2,
			size_t num );
  • Compare num bytes from ptr1 and ptr2 pointers
  • The return value is as follows

Returns an integral value indicating the relationship between the content of the memory blocks:

(returns an integer value representing the relationship between the contents of the memory block):

return valueindicates
<0the first byte that does not match in both memory blocks has a lower value in ptr1 than in ptr2 (if evaluated as unsigned char values)
0the contents of both memory blocks are equal
<0the first byte that does not match in both memory blocks has a greater value in ptr1 than in ptr2 (if evaluated as unsigned char values)
#include <stdio.h>
#include <string.h>

int main()
{
	int arr1[] = { 1,2,3,4,5 };
	int arr2[] = { 1,3,2 };
	int ret = memcmp(arr1, arr2, 12);
	printf("%d\n", ret);

	return 0;
}

This blog code is for reference only, if there is any error, please correct me

Tags: C C++ data structure

Posted by Roggan on Sat, 22 Oct 2022 23:11:08 +0530