Filter out strings that are short or that are outside the alphabet of a language
I wrote this function to remove from the array those words that are less than n values. More precisely, I updated my old function and added some functionality. Since I am a beginner in this programming language and so I could in my code allow for various illogical errors: Old version of my function:
$words = array("ӯтар","ӯро","ӯт","ғариб","афтода","даст", "ра", "I","Hello", "World"); // Simple array for using
function delValByLessOld($array)
{
return preg_grep('~A[^qwertyuiopasdfghjklzxcvbnmQWERTYUIOPASDFGHJKLZXCVBNM]{2,}z~u', $array);
}
Simple usage:
echo "<pre>";
print_r(delValByLessOld($words));
echo "</pre>";
Output:
Array
(
[0] => ӯтар
[1] => ӯро
[2] => ӯт
[3] => ғариб
[4] => афтода
[5] => даст
[6] => ра
)
My new (updated) function:
function delValByLess(&$array, $less = 2, $lang = 'en')
{
$search = array('english','eng', 'russian', 'rus', 'tajik', 'taj', 'tjk');
$replace = array('en', 'en', 'ru', 'ru', 'tj', 'tj', 'tj');
if(is_string($lang))
{
$lang = mb_strtolower($lang);
$lang = str_replace($search, $replace, $lang);
}
if(is_array($lang))
{
foreach ($lang as $lan)
{
$lan = mb_strtolower($lan);
$lang[$lan] = str_replace($search, $replace, $lan);
}
}
$languages =
[
'en' => 'qwertyuiopasdfghjklzxcvbnmQWERTYUIOPASDFGHJKLZXCVBNM',
'ru' => 'ёйцукенгшщзхъфывапролджэячсмитьбюЁЙЦУКЕНГШЩЗХЪФЫВАПРОЛДЖЭЯЧСМИТЬБЮ',
'tj' => 'ғӯқҳҷӣҒӮҚҲҶӢ'
];
if (is_array($array) && is_numeric($less))
{
if(is_string($lang))
{
if (isset($languages[$lang]))
{
return preg_grep('~A[^'.$languages[$lang].']{'.$less.',}z~u', $array);
}
}
if(is_array($lang))
{
foreach ($lang as $lan)
{
if (isset($languages[$lan]))
{
$array = preg_grep('~A[^'.$languages[$lan].']{'.$less.',}z~u', $array);
}
}
return $array;
}
}
return false;
}
Simple usage:
echo "<pre>";
print_r(delValByLess($words,'3', ['ENGLISH','TAJIK']));
echo "</pre>";
Output:
Array
(
[4] => афтода
[5] => даст
)
beginner php strings regex unicode
add a comment |
I wrote this function to remove from the array those words that are less than n values. More precisely, I updated my old function and added some functionality. Since I am a beginner in this programming language and so I could in my code allow for various illogical errors: Old version of my function:
$words = array("ӯтар","ӯро","ӯт","ғариб","афтода","даст", "ра", "I","Hello", "World"); // Simple array for using
function delValByLessOld($array)
{
return preg_grep('~A[^qwertyuiopasdfghjklzxcvbnmQWERTYUIOPASDFGHJKLZXCVBNM]{2,}z~u', $array);
}
Simple usage:
echo "<pre>";
print_r(delValByLessOld($words));
echo "</pre>";
Output:
Array
(
[0] => ӯтар
[1] => ӯро
[2] => ӯт
[3] => ғариб
[4] => афтода
[5] => даст
[6] => ра
)
My new (updated) function:
function delValByLess(&$array, $less = 2, $lang = 'en')
{
$search = array('english','eng', 'russian', 'rus', 'tajik', 'taj', 'tjk');
$replace = array('en', 'en', 'ru', 'ru', 'tj', 'tj', 'tj');
if(is_string($lang))
{
$lang = mb_strtolower($lang);
$lang = str_replace($search, $replace, $lang);
}
if(is_array($lang))
{
foreach ($lang as $lan)
{
$lan = mb_strtolower($lan);
$lang[$lan] = str_replace($search, $replace, $lan);
}
}
$languages =
[
'en' => 'qwertyuiopasdfghjklzxcvbnmQWERTYUIOPASDFGHJKLZXCVBNM',
'ru' => 'ёйцукенгшщзхъфывапролджэячсмитьбюЁЙЦУКЕНГШЩЗХЪФЫВАПРОЛДЖЭЯЧСМИТЬБЮ',
'tj' => 'ғӯқҳҷӣҒӮҚҲҶӢ'
];
if (is_array($array) && is_numeric($less))
{
if(is_string($lang))
{
if (isset($languages[$lang]))
{
return preg_grep('~A[^'.$languages[$lang].']{'.$less.',}z~u', $array);
}
}
if(is_array($lang))
{
foreach ($lang as $lan)
{
if (isset($languages[$lan]))
{
$array = preg_grep('~A[^'.$languages[$lan].']{'.$less.',}z~u', $array);
}
}
return $array;
}
}
return false;
}
Simple usage:
echo "<pre>";
print_r(delValByLess($words,'3', ['ENGLISH','TAJIK']));
echo "</pre>";
Output:
Array
(
[4] => афтода
[5] => даст
)
beginner php strings regex unicode
1
Which one of the two examples would you like to be peer reviewed? Both or only one? Please state it clearly... or maybe you wish a comparative-review?
– t3chb0t
Oct 20 '17 at 11:15
I wish a comparative-review @t3chb0t
– Andreas Hunter
Oct 20 '17 at 12:28
add a comment |
I wrote this function to remove from the array those words that are less than n values. More precisely, I updated my old function and added some functionality. Since I am a beginner in this programming language and so I could in my code allow for various illogical errors: Old version of my function:
$words = array("ӯтар","ӯро","ӯт","ғариб","афтода","даст", "ра", "I","Hello", "World"); // Simple array for using
function delValByLessOld($array)
{
return preg_grep('~A[^qwertyuiopasdfghjklzxcvbnmQWERTYUIOPASDFGHJKLZXCVBNM]{2,}z~u', $array);
}
Simple usage:
echo "<pre>";
print_r(delValByLessOld($words));
echo "</pre>";
Output:
Array
(
[0] => ӯтар
[1] => ӯро
[2] => ӯт
[3] => ғариб
[4] => афтода
[5] => даст
[6] => ра
)
My new (updated) function:
function delValByLess(&$array, $less = 2, $lang = 'en')
{
$search = array('english','eng', 'russian', 'rus', 'tajik', 'taj', 'tjk');
$replace = array('en', 'en', 'ru', 'ru', 'tj', 'tj', 'tj');
if(is_string($lang))
{
$lang = mb_strtolower($lang);
$lang = str_replace($search, $replace, $lang);
}
if(is_array($lang))
{
foreach ($lang as $lan)
{
$lan = mb_strtolower($lan);
$lang[$lan] = str_replace($search, $replace, $lan);
}
}
$languages =
[
'en' => 'qwertyuiopasdfghjklzxcvbnmQWERTYUIOPASDFGHJKLZXCVBNM',
'ru' => 'ёйцукенгшщзхъфывапролджэячсмитьбюЁЙЦУКЕНГШЩЗХЪФЫВАПРОЛДЖЭЯЧСМИТЬБЮ',
'tj' => 'ғӯқҳҷӣҒӮҚҲҶӢ'
];
if (is_array($array) && is_numeric($less))
{
if(is_string($lang))
{
if (isset($languages[$lang]))
{
return preg_grep('~A[^'.$languages[$lang].']{'.$less.',}z~u', $array);
}
}
if(is_array($lang))
{
foreach ($lang as $lan)
{
if (isset($languages[$lan]))
{
$array = preg_grep('~A[^'.$languages[$lan].']{'.$less.',}z~u', $array);
}
}
return $array;
}
}
return false;
}
Simple usage:
echo "<pre>";
print_r(delValByLess($words,'3', ['ENGLISH','TAJIK']));
echo "</pre>";
Output:
Array
(
[4] => афтода
[5] => даст
)
beginner php strings regex unicode
I wrote this function to remove from the array those words that are less than n values. More precisely, I updated my old function and added some functionality. Since I am a beginner in this programming language and so I could in my code allow for various illogical errors: Old version of my function:
$words = array("ӯтар","ӯро","ӯт","ғариб","афтода","даст", "ра", "I","Hello", "World"); // Simple array for using
function delValByLessOld($array)
{
return preg_grep('~A[^qwertyuiopasdfghjklzxcvbnmQWERTYUIOPASDFGHJKLZXCVBNM]{2,}z~u', $array);
}
Simple usage:
echo "<pre>";
print_r(delValByLessOld($words));
echo "</pre>";
Output:
Array
(
[0] => ӯтар
[1] => ӯро
[2] => ӯт
[3] => ғариб
[4] => афтода
[5] => даст
[6] => ра
)
My new (updated) function:
function delValByLess(&$array, $less = 2, $lang = 'en')
{
$search = array('english','eng', 'russian', 'rus', 'tajik', 'taj', 'tjk');
$replace = array('en', 'en', 'ru', 'ru', 'tj', 'tj', 'tj');
if(is_string($lang))
{
$lang = mb_strtolower($lang);
$lang = str_replace($search, $replace, $lang);
}
if(is_array($lang))
{
foreach ($lang as $lan)
{
$lan = mb_strtolower($lan);
$lang[$lan] = str_replace($search, $replace, $lan);
}
}
$languages =
[
'en' => 'qwertyuiopasdfghjklzxcvbnmQWERTYUIOPASDFGHJKLZXCVBNM',
'ru' => 'ёйцукенгшщзхъфывапролджэячсмитьбюЁЙЦУКЕНГШЩЗХЪФЫВАПРОЛДЖЭЯЧСМИТЬБЮ',
'tj' => 'ғӯқҳҷӣҒӮҚҲҶӢ'
];
if (is_array($array) && is_numeric($less))
{
if(is_string($lang))
{
if (isset($languages[$lang]))
{
return preg_grep('~A[^'.$languages[$lang].']{'.$less.',}z~u', $array);
}
}
if(is_array($lang))
{
foreach ($lang as $lan)
{
if (isset($languages[$lan]))
{
$array = preg_grep('~A[^'.$languages[$lan].']{'.$less.',}z~u', $array);
}
}
return $array;
}
}
return false;
}
Simple usage:
echo "<pre>";
print_r(delValByLess($words,'3', ['ENGLISH','TAJIK']));
echo "</pre>";
Output:
Array
(
[4] => афтода
[5] => даст
)
beginner php strings regex unicode
beginner php strings regex unicode
edited Oct 19 '17 at 21:06
200_success
129k15152414
129k15152414
asked Oct 19 '17 at 16:04
Andreas HunterAndreas Hunter
171110
171110
1
Which one of the two examples would you like to be peer reviewed? Both or only one? Please state it clearly... or maybe you wish a comparative-review?
– t3chb0t
Oct 20 '17 at 11:15
I wish a comparative-review @t3chb0t
– Andreas Hunter
Oct 20 '17 at 12:28
add a comment |
1
Which one of the two examples would you like to be peer reviewed? Both or only one? Please state it clearly... or maybe you wish a comparative-review?
– t3chb0t
Oct 20 '17 at 11:15
I wish a comparative-review @t3chb0t
– Andreas Hunter
Oct 20 '17 at 12:28
1
1
Which one of the two examples would you like to be peer reviewed? Both or only one? Please state it clearly... or maybe you wish a comparative-review?
– t3chb0t
Oct 20 '17 at 11:15
Which one of the two examples would you like to be peer reviewed? Both or only one? Please state it clearly... or maybe you wish a comparative-review?
– t3chb0t
Oct 20 '17 at 11:15
I wish a comparative-review @t3chb0t
– Andreas Hunter
Oct 20 '17 at 12:28
I wish a comparative-review @t3chb0t
– Andreas Hunter
Oct 20 '17 at 12:28
add a comment |
3 Answers
3
active
oldest
votes
Disclaimer: this answer is intended only to point out the general principle of a possible simplification about array vs not array of an argument, and doesn't take care about real correctness and/or efficiency of the rest of the code (see the pertinent analysis proposed by @Pieter Witvoet).
When you write a function where you allow some arguments to be either arrays or simple values (regardless value is string or numeric), a pretty simple way to manage it is, for each involved $arg
:
if (!is_array($arg)) {
$arg = [$arg];
}
Applied to your entire code, it dramatically simplifies it:
function delValByLess(&$array, $less = 2, $lang = 'en')
{
$search = array('english','eng', 'russian', 'rus', 'tajik', 'taj', 'tjk');
$replace = array('en', 'en', 'ru', 'ru', 'tj', 'tj', 'tj');
if(is_array($lang))
{
$lang = [$lang];
}
foreach ($lang as $lan)
{
$lan = mb_strtolower($lan);
$lang[$lan] = str_replace($search, $replace, $lan);
}
$languages =
[
'en' => 'qwertyuiopasdfghjklzxcvbnmQWERTYUIOPASDFGHJKLZXCVBNM',
'ru' => 'ёйцукенгшщзхъфывапролджэячсмитьбюЁЙЦУКЕНГШЩЗХЪФЫВАПРОЛДЖЭЯЧСМИТЬБЮ',
'tj' => 'ғӯқҳҷӣҒӮҚҲҶӢ'
];
if (is_array($array) && is_numeric($less))
{
foreach ($lang as $lan)
{
if (isset($languages[$lan]))
{
$array = preg_grep('~A[^'.$languages[$lan].']{'.$less.',}z~u', $array);
}
}
return $array;
}
return false;
}
Thank you for answer and short of code my function! @cFreed
– Andreas Hunter
Oct 20 '17 at 8:48
add a comment |
It took me a while to decipher what you meant with 'less than n values'. What your function actually does: it filters out all words that are either shorter than the specified length (less
) or contain any character from the specified language(s).
- The function name is very undescriptive. In a small project that may not be an issue, but in larger projects it quickly becomes impossible to remember what every piece of code does. A good name allows you to quickly understand what a function does. A poor name often requires you to look at how it's implemented, which takes a lot more time. What about
removeShortAndForeignWords
? - The same goes for parameter and variable names. Good names make code easier to understand:
$array
->$words
,$less
->$minimumLength
,$lang
->$languageCode
,$languages
->$languageFilters
, and so on. - Why do you try to parse language names? That's very difficult to get right (what if I wanted to use
American English
, orру́сский
?), so the effect is that your function becomes less reliable, not easier to use. It's much more effective to just document which language codes your function supports (en
,ru
andtj
). In a statically typed language I'd use an enum, but here a set of constants should do.
$lang[$lan] = str_replace($search, $replace, $lan);
- Here, because you use$lan
as a key, you're adding entries. If the input wasarray('en', 'ru')
, you end up witharray('en', 'ru', 'en' => 'en', 'ru' => 'ru')
, which results in double work (the filters for each language are executed twice).
if(is_array($lang))
- Useelse if
here.
qwertyuiopasdfghjklzxcvbnm
matches a specific keyboard layout, butabcdefghijklmnopqrstuvwxyz
is a much more natural order for latin characters. However, since you're using a regex, you can simply usea-zA-Z
.- You can remove a lot of code duplication by making your function recursive. If
$lang
is an array, then your function can call itself (once for every language code in the array) and then return the final result. Then the rest of the code doesn't need to take multiple language codes into account anymore. - The return value behavior of your function is inconsistent. If you pass an invalid language code, it returns
false
. However, if you pass an array of invalid language codes, it returns the input words array. - Personally I'd be very careful with references in PHP (
&$array
). I've seen some surprising edge-cases and as far as I know they often degrade performance. In this case, your function is both returning a filtered array, and it's updating the original array (but only if you pass an array of language codes). So in your last example, not only does the output contain 2 russian words,$words
has also been replaced with the results array. That's a very surprising side-effect.
As per your request, here's how I would probably have written it. I don't often work with PHP, and I haven't thoroughly tested this, but it should give you a reasonable impression:
const LANG_EN = 'en';
const LANG_RU = 'ru';
const LANG_TJ = 'tj';
/**
* Takes an array of words and filters out any words that are too short or that contain any characters from the specified language(s).
*
* @param array $words The array of words to filter
* @param number $minimumLength The minimum allowed word length
* @param string|array $languageCode The language(s) that must be filtered out. The following languages are supported: LANG_EN, LANG_RU and LANG_TJ.
*
* @return array The given words, excluding words that are too short and words that are from the specified language(s).
*/
function removeShortAndForeignWords($words, $minimumLength = 2, $languageCode = LANG_EN) {
if (is_array($languageCode)) {
foreach ($languageCode as $language) {
$words = removeShortAndForeignWords($words, $minimumLength, $language);
}
return $words;
}
$languageFilters = [
LANG_EN => '[^a-zA-Z]',
LANG_RU => '[^ёйцукенгшщзхъфывапролджэячсмитьбюЁЙЦУКЕНГШЩЗХЪФЫВАПРОЛДЖЭЯЧСМИТЬБЮ]',
LANG_TJ => '[^ғӯқҳҷӣҒӮҚҲҶӢ]',
];
if (!isset($languageFilters[$languageCode])) {
return false;
}
return preg_grep('~A'.$languageFilters[$languageCode].'{'.$minimumLength.',}z~u', $words);
}
Thank you for your advice dear Pieter Witvoet! As you mentioned above, the name of my functions is incorrectly specified so that its third-party developers understand the essence of the function and for what or for what solution it is intended. Since I am not an Englishman or because I do not understand English very well (this is my most important weak point), I make such mistakes. And because of this I ask such questions to improve my code. In the end, I want to say that you could not write a final (code) function in which you represented it in your descriptions and tips? @Pieter Witvoet
– Andreas Hunter
Oct 20 '17 at 3:36
@Otabek: coming up with good names is often difficult, no matter which language you speak. Anyway, I've added an example.
– Pieter Witvoet
Oct 20 '17 at 20:23
add a comment |
I'll try to itemize my review sequentially so that I don't miss anything.
Try to describe your custom function when naming it. The function is going to "filter the array" by one or more "languages" by a "minimum length". Perhaps
function keepByMinimumLengthAndLanguages()
. I don't know what I would settle on if this was my project. My suggestion risks being too wordy, but it does speak literally about its process.Stabilize your input types. If an incoming argument may be a string or an array, then just fix it to an array type to avoid having to cast the string as an array.
function keepByMinimumLengthAndLanguages(&$array, $min = 2, $langs = ['en'])
Now that your incoming
$langs
variable is expected to be an array... (If you cannot afford to replace other scripts that call this function, you can just write$langs = (array)$langs;
as the first line inside your custom function) ...you seem to require a bit of clean up on the language strings.
$language_whitelist = [
'english' => 'en',
'eng' => 'en',
'russian' => 'ru',
'rus' => 'ru',
'tajik' => 'tj',
'taj' => 'tj',
'tjk' => 'tj',
];
foreach ($langs as $lang) {
$lang_keys[$language_whitelist[$lang] ?? $lang] = null; // reduce to 2 chars or use original input
}
If the default
$min
value is2
, then I suppose you can build in a sanitizing step for this variable as well.if (!is_int($min) || $min < 1) { $min = 2; }
... or similar.
Now that you have the 2-character language keys, you can filter your array of language-specific character classes and use all character lists that qualify.
$language_letters =
[
'en' => 'qwertyuiopasdfghjklzxcvbnmQWERTYUIOPASDFGHJKLZXCVBNM',
'ru' => 'ёйцукенгшщзхъфывапролджэячсмитьбюЁЙЦУКЕНГШЩЗХЪФЫВАПРОЛДЖЭЯЧСМИТЬБЮ',
'tj' => 'ғӯқҳҷӣҒӮҚҲҶӢ',
];
$array = preg_grep('~^[' . implode(array_intersect_key($language_letters, $lang_keys)) . ']{' . $min . ',}$~u', $array);
Notice that I am not using a negated character class (I removed the
^
).
Because you are modifying by reference, you don't need to
return
anything. I suppose if you didn't want to force valid default values in your function, you could flag invalid incoming data and eitherreturn true
when everything goes swimmingly, or return an error message. This way you can modify by reference and check for errors.
if (keepByMinimumLengthAndLanguages($array, 3, ['en']) === true) {
// all good, use $array down script
} else {
// display error
}
add a comment |
Your Answer
StackExchange.ifUsing("editor", function () {
return StackExchange.using("mathjaxEditing", function () {
StackExchange.MarkdownEditor.creationCallbacks.add(function (editor, postfix) {
StackExchange.mathjaxEditing.prepareWmdForMathJax(editor, postfix, [["\$", "\$"]]);
});
});
}, "mathjax-editing");
StackExchange.ifUsing("editor", function () {
StackExchange.using("externalEditor", function () {
StackExchange.using("snippets", function () {
StackExchange.snippets.init();
});
});
}, "code-snippets");
StackExchange.ready(function() {
var channelOptions = {
tags: "".split(" "),
id: "196"
};
initTagRenderer("".split(" "), "".split(" "), channelOptions);
StackExchange.using("externalEditor", function() {
// Have to fire editor after snippets, if snippets enabled
if (StackExchange.settings.snippets.snippetsEnabled) {
StackExchange.using("snippets", function() {
createEditor();
});
}
else {
createEditor();
}
});
function createEditor() {
StackExchange.prepareEditor({
heartbeatType: 'answer',
autoActivateHeartbeat: false,
convertImagesToLinks: false,
noModals: true,
showLowRepImageUploadWarning: true,
reputationToPostImages: null,
bindNavPrevention: true,
postfix: "",
imageUploader: {
brandingHtml: "Powered by u003ca class="icon-imgur-white" href="https://imgur.com/"u003eu003c/au003e",
contentPolicyHtml: "User contributions licensed under u003ca href="https://creativecommons.org/licenses/by-sa/3.0/"u003ecc by-sa 3.0 with attribution requiredu003c/au003e u003ca href="https://stackoverflow.com/legal/content-policy"u003e(content policy)u003c/au003e",
allowUrls: true
},
onDemand: true,
discardSelector: ".discard-answer"
,immediatelyShowMarkdownHelp:true
});
}
});
Sign up or log in
StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Required, but never shown
StackExchange.ready(
function () {
StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fcodereview.stackexchange.com%2fquestions%2f178302%2ffilter-out-strings-that-are-short-or-that-are-outside-the-alphabet-of-a-language%23new-answer', 'question_page');
}
);
Post as a guest
Required, but never shown
3 Answers
3
active
oldest
votes
3 Answers
3
active
oldest
votes
active
oldest
votes
active
oldest
votes
Disclaimer: this answer is intended only to point out the general principle of a possible simplification about array vs not array of an argument, and doesn't take care about real correctness and/or efficiency of the rest of the code (see the pertinent analysis proposed by @Pieter Witvoet).
When you write a function where you allow some arguments to be either arrays or simple values (regardless value is string or numeric), a pretty simple way to manage it is, for each involved $arg
:
if (!is_array($arg)) {
$arg = [$arg];
}
Applied to your entire code, it dramatically simplifies it:
function delValByLess(&$array, $less = 2, $lang = 'en')
{
$search = array('english','eng', 'russian', 'rus', 'tajik', 'taj', 'tjk');
$replace = array('en', 'en', 'ru', 'ru', 'tj', 'tj', 'tj');
if(is_array($lang))
{
$lang = [$lang];
}
foreach ($lang as $lan)
{
$lan = mb_strtolower($lan);
$lang[$lan] = str_replace($search, $replace, $lan);
}
$languages =
[
'en' => 'qwertyuiopasdfghjklzxcvbnmQWERTYUIOPASDFGHJKLZXCVBNM',
'ru' => 'ёйцукенгшщзхъфывапролджэячсмитьбюЁЙЦУКЕНГШЩЗХЪФЫВАПРОЛДЖЭЯЧСМИТЬБЮ',
'tj' => 'ғӯқҳҷӣҒӮҚҲҶӢ'
];
if (is_array($array) && is_numeric($less))
{
foreach ($lang as $lan)
{
if (isset($languages[$lan]))
{
$array = preg_grep('~A[^'.$languages[$lan].']{'.$less.',}z~u', $array);
}
}
return $array;
}
return false;
}
Thank you for answer and short of code my function! @cFreed
– Andreas Hunter
Oct 20 '17 at 8:48
add a comment |
Disclaimer: this answer is intended only to point out the general principle of a possible simplification about array vs not array of an argument, and doesn't take care about real correctness and/or efficiency of the rest of the code (see the pertinent analysis proposed by @Pieter Witvoet).
When you write a function where you allow some arguments to be either arrays or simple values (regardless value is string or numeric), a pretty simple way to manage it is, for each involved $arg
:
if (!is_array($arg)) {
$arg = [$arg];
}
Applied to your entire code, it dramatically simplifies it:
function delValByLess(&$array, $less = 2, $lang = 'en')
{
$search = array('english','eng', 'russian', 'rus', 'tajik', 'taj', 'tjk');
$replace = array('en', 'en', 'ru', 'ru', 'tj', 'tj', 'tj');
if(is_array($lang))
{
$lang = [$lang];
}
foreach ($lang as $lan)
{
$lan = mb_strtolower($lan);
$lang[$lan] = str_replace($search, $replace, $lan);
}
$languages =
[
'en' => 'qwertyuiopasdfghjklzxcvbnmQWERTYUIOPASDFGHJKLZXCVBNM',
'ru' => 'ёйцукенгшщзхъфывапролджэячсмитьбюЁЙЦУКЕНГШЩЗХЪФЫВАПРОЛДЖЭЯЧСМИТЬБЮ',
'tj' => 'ғӯқҳҷӣҒӮҚҲҶӢ'
];
if (is_array($array) && is_numeric($less))
{
foreach ($lang as $lan)
{
if (isset($languages[$lan]))
{
$array = preg_grep('~A[^'.$languages[$lan].']{'.$less.',}z~u', $array);
}
}
return $array;
}
return false;
}
Thank you for answer and short of code my function! @cFreed
– Andreas Hunter
Oct 20 '17 at 8:48
add a comment |
Disclaimer: this answer is intended only to point out the general principle of a possible simplification about array vs not array of an argument, and doesn't take care about real correctness and/or efficiency of the rest of the code (see the pertinent analysis proposed by @Pieter Witvoet).
When you write a function where you allow some arguments to be either arrays or simple values (regardless value is string or numeric), a pretty simple way to manage it is, for each involved $arg
:
if (!is_array($arg)) {
$arg = [$arg];
}
Applied to your entire code, it dramatically simplifies it:
function delValByLess(&$array, $less = 2, $lang = 'en')
{
$search = array('english','eng', 'russian', 'rus', 'tajik', 'taj', 'tjk');
$replace = array('en', 'en', 'ru', 'ru', 'tj', 'tj', 'tj');
if(is_array($lang))
{
$lang = [$lang];
}
foreach ($lang as $lan)
{
$lan = mb_strtolower($lan);
$lang[$lan] = str_replace($search, $replace, $lan);
}
$languages =
[
'en' => 'qwertyuiopasdfghjklzxcvbnmQWERTYUIOPASDFGHJKLZXCVBNM',
'ru' => 'ёйцукенгшщзхъфывапролджэячсмитьбюЁЙЦУКЕНГШЩЗХЪФЫВАПРОЛДЖЭЯЧСМИТЬБЮ',
'tj' => 'ғӯқҳҷӣҒӮҚҲҶӢ'
];
if (is_array($array) && is_numeric($less))
{
foreach ($lang as $lan)
{
if (isset($languages[$lan]))
{
$array = preg_grep('~A[^'.$languages[$lan].']{'.$less.',}z~u', $array);
}
}
return $array;
}
return false;
}
Disclaimer: this answer is intended only to point out the general principle of a possible simplification about array vs not array of an argument, and doesn't take care about real correctness and/or efficiency of the rest of the code (see the pertinent analysis proposed by @Pieter Witvoet).
When you write a function where you allow some arguments to be either arrays or simple values (regardless value is string or numeric), a pretty simple way to manage it is, for each involved $arg
:
if (!is_array($arg)) {
$arg = [$arg];
}
Applied to your entire code, it dramatically simplifies it:
function delValByLess(&$array, $less = 2, $lang = 'en')
{
$search = array('english','eng', 'russian', 'rus', 'tajik', 'taj', 'tjk');
$replace = array('en', 'en', 'ru', 'ru', 'tj', 'tj', 'tj');
if(is_array($lang))
{
$lang = [$lang];
}
foreach ($lang as $lan)
{
$lan = mb_strtolower($lan);
$lang[$lan] = str_replace($search, $replace, $lan);
}
$languages =
[
'en' => 'qwertyuiopasdfghjklzxcvbnmQWERTYUIOPASDFGHJKLZXCVBNM',
'ru' => 'ёйцукенгшщзхъфывапролджэячсмитьбюЁЙЦУКЕНГШЩЗХЪФЫВАПРОЛДЖЭЯЧСМИТЬБЮ',
'tj' => 'ғӯқҳҷӣҒӮҚҲҶӢ'
];
if (is_array($array) && is_numeric($less))
{
foreach ($lang as $lan)
{
if (isset($languages[$lan]))
{
$array = preg_grep('~A[^'.$languages[$lan].']{'.$less.',}z~u', $array);
}
}
return $array;
}
return false;
}
answered Oct 20 '17 at 8:13
cFreedcFreed
2,463819
2,463819
Thank you for answer and short of code my function! @cFreed
– Andreas Hunter
Oct 20 '17 at 8:48
add a comment |
Thank you for answer and short of code my function! @cFreed
– Andreas Hunter
Oct 20 '17 at 8:48
Thank you for answer and short of code my function! @cFreed
– Andreas Hunter
Oct 20 '17 at 8:48
Thank you for answer and short of code my function! @cFreed
– Andreas Hunter
Oct 20 '17 at 8:48
add a comment |
It took me a while to decipher what you meant with 'less than n values'. What your function actually does: it filters out all words that are either shorter than the specified length (less
) or contain any character from the specified language(s).
- The function name is very undescriptive. In a small project that may not be an issue, but in larger projects it quickly becomes impossible to remember what every piece of code does. A good name allows you to quickly understand what a function does. A poor name often requires you to look at how it's implemented, which takes a lot more time. What about
removeShortAndForeignWords
? - The same goes for parameter and variable names. Good names make code easier to understand:
$array
->$words
,$less
->$minimumLength
,$lang
->$languageCode
,$languages
->$languageFilters
, and so on. - Why do you try to parse language names? That's very difficult to get right (what if I wanted to use
American English
, orру́сский
?), so the effect is that your function becomes less reliable, not easier to use. It's much more effective to just document which language codes your function supports (en
,ru
andtj
). In a statically typed language I'd use an enum, but here a set of constants should do.
$lang[$lan] = str_replace($search, $replace, $lan);
- Here, because you use$lan
as a key, you're adding entries. If the input wasarray('en', 'ru')
, you end up witharray('en', 'ru', 'en' => 'en', 'ru' => 'ru')
, which results in double work (the filters for each language are executed twice).
if(is_array($lang))
- Useelse if
here.
qwertyuiopasdfghjklzxcvbnm
matches a specific keyboard layout, butabcdefghijklmnopqrstuvwxyz
is a much more natural order for latin characters. However, since you're using a regex, you can simply usea-zA-Z
.- You can remove a lot of code duplication by making your function recursive. If
$lang
is an array, then your function can call itself (once for every language code in the array) and then return the final result. Then the rest of the code doesn't need to take multiple language codes into account anymore. - The return value behavior of your function is inconsistent. If you pass an invalid language code, it returns
false
. However, if you pass an array of invalid language codes, it returns the input words array. - Personally I'd be very careful with references in PHP (
&$array
). I've seen some surprising edge-cases and as far as I know they often degrade performance. In this case, your function is both returning a filtered array, and it's updating the original array (but only if you pass an array of language codes). So in your last example, not only does the output contain 2 russian words,$words
has also been replaced with the results array. That's a very surprising side-effect.
As per your request, here's how I would probably have written it. I don't often work with PHP, and I haven't thoroughly tested this, but it should give you a reasonable impression:
const LANG_EN = 'en';
const LANG_RU = 'ru';
const LANG_TJ = 'tj';
/**
* Takes an array of words and filters out any words that are too short or that contain any characters from the specified language(s).
*
* @param array $words The array of words to filter
* @param number $minimumLength The minimum allowed word length
* @param string|array $languageCode The language(s) that must be filtered out. The following languages are supported: LANG_EN, LANG_RU and LANG_TJ.
*
* @return array The given words, excluding words that are too short and words that are from the specified language(s).
*/
function removeShortAndForeignWords($words, $minimumLength = 2, $languageCode = LANG_EN) {
if (is_array($languageCode)) {
foreach ($languageCode as $language) {
$words = removeShortAndForeignWords($words, $minimumLength, $language);
}
return $words;
}
$languageFilters = [
LANG_EN => '[^a-zA-Z]',
LANG_RU => '[^ёйцукенгшщзхъфывапролджэячсмитьбюЁЙЦУКЕНГШЩЗХЪФЫВАПРОЛДЖЭЯЧСМИТЬБЮ]',
LANG_TJ => '[^ғӯқҳҷӣҒӮҚҲҶӢ]',
];
if (!isset($languageFilters[$languageCode])) {
return false;
}
return preg_grep('~A'.$languageFilters[$languageCode].'{'.$minimumLength.',}z~u', $words);
}
Thank you for your advice dear Pieter Witvoet! As you mentioned above, the name of my functions is incorrectly specified so that its third-party developers understand the essence of the function and for what or for what solution it is intended. Since I am not an Englishman or because I do not understand English very well (this is my most important weak point), I make such mistakes. And because of this I ask such questions to improve my code. In the end, I want to say that you could not write a final (code) function in which you represented it in your descriptions and tips? @Pieter Witvoet
– Andreas Hunter
Oct 20 '17 at 3:36
@Otabek: coming up with good names is often difficult, no matter which language you speak. Anyway, I've added an example.
– Pieter Witvoet
Oct 20 '17 at 20:23
add a comment |
It took me a while to decipher what you meant with 'less than n values'. What your function actually does: it filters out all words that are either shorter than the specified length (less
) or contain any character from the specified language(s).
- The function name is very undescriptive. In a small project that may not be an issue, but in larger projects it quickly becomes impossible to remember what every piece of code does. A good name allows you to quickly understand what a function does. A poor name often requires you to look at how it's implemented, which takes a lot more time. What about
removeShortAndForeignWords
? - The same goes for parameter and variable names. Good names make code easier to understand:
$array
->$words
,$less
->$minimumLength
,$lang
->$languageCode
,$languages
->$languageFilters
, and so on. - Why do you try to parse language names? That's very difficult to get right (what if I wanted to use
American English
, orру́сский
?), so the effect is that your function becomes less reliable, not easier to use. It's much more effective to just document which language codes your function supports (en
,ru
andtj
). In a statically typed language I'd use an enum, but here a set of constants should do.
$lang[$lan] = str_replace($search, $replace, $lan);
- Here, because you use$lan
as a key, you're adding entries. If the input wasarray('en', 'ru')
, you end up witharray('en', 'ru', 'en' => 'en', 'ru' => 'ru')
, which results in double work (the filters for each language are executed twice).
if(is_array($lang))
- Useelse if
here.
qwertyuiopasdfghjklzxcvbnm
matches a specific keyboard layout, butabcdefghijklmnopqrstuvwxyz
is a much more natural order for latin characters. However, since you're using a regex, you can simply usea-zA-Z
.- You can remove a lot of code duplication by making your function recursive. If
$lang
is an array, then your function can call itself (once for every language code in the array) and then return the final result. Then the rest of the code doesn't need to take multiple language codes into account anymore. - The return value behavior of your function is inconsistent. If you pass an invalid language code, it returns
false
. However, if you pass an array of invalid language codes, it returns the input words array. - Personally I'd be very careful with references in PHP (
&$array
). I've seen some surprising edge-cases and as far as I know they often degrade performance. In this case, your function is both returning a filtered array, and it's updating the original array (but only if you pass an array of language codes). So in your last example, not only does the output contain 2 russian words,$words
has also been replaced with the results array. That's a very surprising side-effect.
As per your request, here's how I would probably have written it. I don't often work with PHP, and I haven't thoroughly tested this, but it should give you a reasonable impression:
const LANG_EN = 'en';
const LANG_RU = 'ru';
const LANG_TJ = 'tj';
/**
* Takes an array of words and filters out any words that are too short or that contain any characters from the specified language(s).
*
* @param array $words The array of words to filter
* @param number $minimumLength The minimum allowed word length
* @param string|array $languageCode The language(s) that must be filtered out. The following languages are supported: LANG_EN, LANG_RU and LANG_TJ.
*
* @return array The given words, excluding words that are too short and words that are from the specified language(s).
*/
function removeShortAndForeignWords($words, $minimumLength = 2, $languageCode = LANG_EN) {
if (is_array($languageCode)) {
foreach ($languageCode as $language) {
$words = removeShortAndForeignWords($words, $minimumLength, $language);
}
return $words;
}
$languageFilters = [
LANG_EN => '[^a-zA-Z]',
LANG_RU => '[^ёйцукенгшщзхъфывапролджэячсмитьбюЁЙЦУКЕНГШЩЗХЪФЫВАПРОЛДЖЭЯЧСМИТЬБЮ]',
LANG_TJ => '[^ғӯқҳҷӣҒӮҚҲҶӢ]',
];
if (!isset($languageFilters[$languageCode])) {
return false;
}
return preg_grep('~A'.$languageFilters[$languageCode].'{'.$minimumLength.',}z~u', $words);
}
Thank you for your advice dear Pieter Witvoet! As you mentioned above, the name of my functions is incorrectly specified so that its third-party developers understand the essence of the function and for what or for what solution it is intended. Since I am not an Englishman or because I do not understand English very well (this is my most important weak point), I make such mistakes. And because of this I ask such questions to improve my code. In the end, I want to say that you could not write a final (code) function in which you represented it in your descriptions and tips? @Pieter Witvoet
– Andreas Hunter
Oct 20 '17 at 3:36
@Otabek: coming up with good names is often difficult, no matter which language you speak. Anyway, I've added an example.
– Pieter Witvoet
Oct 20 '17 at 20:23
add a comment |
It took me a while to decipher what you meant with 'less than n values'. What your function actually does: it filters out all words that are either shorter than the specified length (less
) or contain any character from the specified language(s).
- The function name is very undescriptive. In a small project that may not be an issue, but in larger projects it quickly becomes impossible to remember what every piece of code does. A good name allows you to quickly understand what a function does. A poor name often requires you to look at how it's implemented, which takes a lot more time. What about
removeShortAndForeignWords
? - The same goes for parameter and variable names. Good names make code easier to understand:
$array
->$words
,$less
->$minimumLength
,$lang
->$languageCode
,$languages
->$languageFilters
, and so on. - Why do you try to parse language names? That's very difficult to get right (what if I wanted to use
American English
, orру́сский
?), so the effect is that your function becomes less reliable, not easier to use. It's much more effective to just document which language codes your function supports (en
,ru
andtj
). In a statically typed language I'd use an enum, but here a set of constants should do.
$lang[$lan] = str_replace($search, $replace, $lan);
- Here, because you use$lan
as a key, you're adding entries. If the input wasarray('en', 'ru')
, you end up witharray('en', 'ru', 'en' => 'en', 'ru' => 'ru')
, which results in double work (the filters for each language are executed twice).
if(is_array($lang))
- Useelse if
here.
qwertyuiopasdfghjklzxcvbnm
matches a specific keyboard layout, butabcdefghijklmnopqrstuvwxyz
is a much more natural order for latin characters. However, since you're using a regex, you can simply usea-zA-Z
.- You can remove a lot of code duplication by making your function recursive. If
$lang
is an array, then your function can call itself (once for every language code in the array) and then return the final result. Then the rest of the code doesn't need to take multiple language codes into account anymore. - The return value behavior of your function is inconsistent. If you pass an invalid language code, it returns
false
. However, if you pass an array of invalid language codes, it returns the input words array. - Personally I'd be very careful with references in PHP (
&$array
). I've seen some surprising edge-cases and as far as I know they often degrade performance. In this case, your function is both returning a filtered array, and it's updating the original array (but only if you pass an array of language codes). So in your last example, not only does the output contain 2 russian words,$words
has also been replaced with the results array. That's a very surprising side-effect.
As per your request, here's how I would probably have written it. I don't often work with PHP, and I haven't thoroughly tested this, but it should give you a reasonable impression:
const LANG_EN = 'en';
const LANG_RU = 'ru';
const LANG_TJ = 'tj';
/**
* Takes an array of words and filters out any words that are too short or that contain any characters from the specified language(s).
*
* @param array $words The array of words to filter
* @param number $minimumLength The minimum allowed word length
* @param string|array $languageCode The language(s) that must be filtered out. The following languages are supported: LANG_EN, LANG_RU and LANG_TJ.
*
* @return array The given words, excluding words that are too short and words that are from the specified language(s).
*/
function removeShortAndForeignWords($words, $minimumLength = 2, $languageCode = LANG_EN) {
if (is_array($languageCode)) {
foreach ($languageCode as $language) {
$words = removeShortAndForeignWords($words, $minimumLength, $language);
}
return $words;
}
$languageFilters = [
LANG_EN => '[^a-zA-Z]',
LANG_RU => '[^ёйцукенгшщзхъфывапролджэячсмитьбюЁЙЦУКЕНГШЩЗХЪФЫВАПРОЛДЖЭЯЧСМИТЬБЮ]',
LANG_TJ => '[^ғӯқҳҷӣҒӮҚҲҶӢ]',
];
if (!isset($languageFilters[$languageCode])) {
return false;
}
return preg_grep('~A'.$languageFilters[$languageCode].'{'.$minimumLength.',}z~u', $words);
}
It took me a while to decipher what you meant with 'less than n values'. What your function actually does: it filters out all words that are either shorter than the specified length (less
) or contain any character from the specified language(s).
- The function name is very undescriptive. In a small project that may not be an issue, but in larger projects it quickly becomes impossible to remember what every piece of code does. A good name allows you to quickly understand what a function does. A poor name often requires you to look at how it's implemented, which takes a lot more time. What about
removeShortAndForeignWords
? - The same goes for parameter and variable names. Good names make code easier to understand:
$array
->$words
,$less
->$minimumLength
,$lang
->$languageCode
,$languages
->$languageFilters
, and so on. - Why do you try to parse language names? That's very difficult to get right (what if I wanted to use
American English
, orру́сский
?), so the effect is that your function becomes less reliable, not easier to use. It's much more effective to just document which language codes your function supports (en
,ru
andtj
). In a statically typed language I'd use an enum, but here a set of constants should do.
$lang[$lan] = str_replace($search, $replace, $lan);
- Here, because you use$lan
as a key, you're adding entries. If the input wasarray('en', 'ru')
, you end up witharray('en', 'ru', 'en' => 'en', 'ru' => 'ru')
, which results in double work (the filters for each language are executed twice).
if(is_array($lang))
- Useelse if
here.
qwertyuiopasdfghjklzxcvbnm
matches a specific keyboard layout, butabcdefghijklmnopqrstuvwxyz
is a much more natural order for latin characters. However, since you're using a regex, you can simply usea-zA-Z
.- You can remove a lot of code duplication by making your function recursive. If
$lang
is an array, then your function can call itself (once for every language code in the array) and then return the final result. Then the rest of the code doesn't need to take multiple language codes into account anymore. - The return value behavior of your function is inconsistent. If you pass an invalid language code, it returns
false
. However, if you pass an array of invalid language codes, it returns the input words array. - Personally I'd be very careful with references in PHP (
&$array
). I've seen some surprising edge-cases and as far as I know they often degrade performance. In this case, your function is both returning a filtered array, and it's updating the original array (but only if you pass an array of language codes). So in your last example, not only does the output contain 2 russian words,$words
has also been replaced with the results array. That's a very surprising side-effect.
As per your request, here's how I would probably have written it. I don't often work with PHP, and I haven't thoroughly tested this, but it should give you a reasonable impression:
const LANG_EN = 'en';
const LANG_RU = 'ru';
const LANG_TJ = 'tj';
/**
* Takes an array of words and filters out any words that are too short or that contain any characters from the specified language(s).
*
* @param array $words The array of words to filter
* @param number $minimumLength The minimum allowed word length
* @param string|array $languageCode The language(s) that must be filtered out. The following languages are supported: LANG_EN, LANG_RU and LANG_TJ.
*
* @return array The given words, excluding words that are too short and words that are from the specified language(s).
*/
function removeShortAndForeignWords($words, $minimumLength = 2, $languageCode = LANG_EN) {
if (is_array($languageCode)) {
foreach ($languageCode as $language) {
$words = removeShortAndForeignWords($words, $minimumLength, $language);
}
return $words;
}
$languageFilters = [
LANG_EN => '[^a-zA-Z]',
LANG_RU => '[^ёйцукенгшщзхъфывапролджэячсмитьбюЁЙЦУКЕНГШЩЗХЪФЫВАПРОЛДЖЭЯЧСМИТЬБЮ]',
LANG_TJ => '[^ғӯқҳҷӣҒӮҚҲҶӢ]',
];
if (!isset($languageFilters[$languageCode])) {
return false;
}
return preg_grep('~A'.$languageFilters[$languageCode].'{'.$minimumLength.',}z~u', $words);
}
edited Oct 20 '17 at 20:18
answered Oct 19 '17 at 19:44
Pieter WitvoetPieter Witvoet
5,801725
5,801725
Thank you for your advice dear Pieter Witvoet! As you mentioned above, the name of my functions is incorrectly specified so that its third-party developers understand the essence of the function and for what or for what solution it is intended. Since I am not an Englishman or because I do not understand English very well (this is my most important weak point), I make such mistakes. And because of this I ask such questions to improve my code. In the end, I want to say that you could not write a final (code) function in which you represented it in your descriptions and tips? @Pieter Witvoet
– Andreas Hunter
Oct 20 '17 at 3:36
@Otabek: coming up with good names is often difficult, no matter which language you speak. Anyway, I've added an example.
– Pieter Witvoet
Oct 20 '17 at 20:23
add a comment |
Thank you for your advice dear Pieter Witvoet! As you mentioned above, the name of my functions is incorrectly specified so that its third-party developers understand the essence of the function and for what or for what solution it is intended. Since I am not an Englishman or because I do not understand English very well (this is my most important weak point), I make such mistakes. And because of this I ask such questions to improve my code. In the end, I want to say that you could not write a final (code) function in which you represented it in your descriptions and tips? @Pieter Witvoet
– Andreas Hunter
Oct 20 '17 at 3:36
@Otabek: coming up with good names is often difficult, no matter which language you speak. Anyway, I've added an example.
– Pieter Witvoet
Oct 20 '17 at 20:23
Thank you for your advice dear Pieter Witvoet! As you mentioned above, the name of my functions is incorrectly specified so that its third-party developers understand the essence of the function and for what or for what solution it is intended. Since I am not an Englishman or because I do not understand English very well (this is my most important weak point), I make such mistakes. And because of this I ask such questions to improve my code. In the end, I want to say that you could not write a final (code) function in which you represented it in your descriptions and tips? @Pieter Witvoet
– Andreas Hunter
Oct 20 '17 at 3:36
Thank you for your advice dear Pieter Witvoet! As you mentioned above, the name of my functions is incorrectly specified so that its third-party developers understand the essence of the function and for what or for what solution it is intended. Since I am not an Englishman or because I do not understand English very well (this is my most important weak point), I make such mistakes. And because of this I ask such questions to improve my code. In the end, I want to say that you could not write a final (code) function in which you represented it in your descriptions and tips? @Pieter Witvoet
– Andreas Hunter
Oct 20 '17 at 3:36
@Otabek: coming up with good names is often difficult, no matter which language you speak. Anyway, I've added an example.
– Pieter Witvoet
Oct 20 '17 at 20:23
@Otabek: coming up with good names is often difficult, no matter which language you speak. Anyway, I've added an example.
– Pieter Witvoet
Oct 20 '17 at 20:23
add a comment |
I'll try to itemize my review sequentially so that I don't miss anything.
Try to describe your custom function when naming it. The function is going to "filter the array" by one or more "languages" by a "minimum length". Perhaps
function keepByMinimumLengthAndLanguages()
. I don't know what I would settle on if this was my project. My suggestion risks being too wordy, but it does speak literally about its process.Stabilize your input types. If an incoming argument may be a string or an array, then just fix it to an array type to avoid having to cast the string as an array.
function keepByMinimumLengthAndLanguages(&$array, $min = 2, $langs = ['en'])
Now that your incoming
$langs
variable is expected to be an array... (If you cannot afford to replace other scripts that call this function, you can just write$langs = (array)$langs;
as the first line inside your custom function) ...you seem to require a bit of clean up on the language strings.
$language_whitelist = [
'english' => 'en',
'eng' => 'en',
'russian' => 'ru',
'rus' => 'ru',
'tajik' => 'tj',
'taj' => 'tj',
'tjk' => 'tj',
];
foreach ($langs as $lang) {
$lang_keys[$language_whitelist[$lang] ?? $lang] = null; // reduce to 2 chars or use original input
}
If the default
$min
value is2
, then I suppose you can build in a sanitizing step for this variable as well.if (!is_int($min) || $min < 1) { $min = 2; }
... or similar.
Now that you have the 2-character language keys, you can filter your array of language-specific character classes and use all character lists that qualify.
$language_letters =
[
'en' => 'qwertyuiopasdfghjklzxcvbnmQWERTYUIOPASDFGHJKLZXCVBNM',
'ru' => 'ёйцукенгшщзхъфывапролджэячсмитьбюЁЙЦУКЕНГШЩЗХЪФЫВАПРОЛДЖЭЯЧСМИТЬБЮ',
'tj' => 'ғӯқҳҷӣҒӮҚҲҶӢ',
];
$array = preg_grep('~^[' . implode(array_intersect_key($language_letters, $lang_keys)) . ']{' . $min . ',}$~u', $array);
Notice that I am not using a negated character class (I removed the
^
).
Because you are modifying by reference, you don't need to
return
anything. I suppose if you didn't want to force valid default values in your function, you could flag invalid incoming data and eitherreturn true
when everything goes swimmingly, or return an error message. This way you can modify by reference and check for errors.
if (keepByMinimumLengthAndLanguages($array, 3, ['en']) === true) {
// all good, use $array down script
} else {
// display error
}
add a comment |
I'll try to itemize my review sequentially so that I don't miss anything.
Try to describe your custom function when naming it. The function is going to "filter the array" by one or more "languages" by a "minimum length". Perhaps
function keepByMinimumLengthAndLanguages()
. I don't know what I would settle on if this was my project. My suggestion risks being too wordy, but it does speak literally about its process.Stabilize your input types. If an incoming argument may be a string or an array, then just fix it to an array type to avoid having to cast the string as an array.
function keepByMinimumLengthAndLanguages(&$array, $min = 2, $langs = ['en'])
Now that your incoming
$langs
variable is expected to be an array... (If you cannot afford to replace other scripts that call this function, you can just write$langs = (array)$langs;
as the first line inside your custom function) ...you seem to require a bit of clean up on the language strings.
$language_whitelist = [
'english' => 'en',
'eng' => 'en',
'russian' => 'ru',
'rus' => 'ru',
'tajik' => 'tj',
'taj' => 'tj',
'tjk' => 'tj',
];
foreach ($langs as $lang) {
$lang_keys[$language_whitelist[$lang] ?? $lang] = null; // reduce to 2 chars or use original input
}
If the default
$min
value is2
, then I suppose you can build in a sanitizing step for this variable as well.if (!is_int($min) || $min < 1) { $min = 2; }
... or similar.
Now that you have the 2-character language keys, you can filter your array of language-specific character classes and use all character lists that qualify.
$language_letters =
[
'en' => 'qwertyuiopasdfghjklzxcvbnmQWERTYUIOPASDFGHJKLZXCVBNM',
'ru' => 'ёйцукенгшщзхъфывапролджэячсмитьбюЁЙЦУКЕНГШЩЗХЪФЫВАПРОЛДЖЭЯЧСМИТЬБЮ',
'tj' => 'ғӯқҳҷӣҒӮҚҲҶӢ',
];
$array = preg_grep('~^[' . implode(array_intersect_key($language_letters, $lang_keys)) . ']{' . $min . ',}$~u', $array);
Notice that I am not using a negated character class (I removed the
^
).
Because you are modifying by reference, you don't need to
return
anything. I suppose if you didn't want to force valid default values in your function, you could flag invalid incoming data and eitherreturn true
when everything goes swimmingly, or return an error message. This way you can modify by reference and check for errors.
if (keepByMinimumLengthAndLanguages($array, 3, ['en']) === true) {
// all good, use $array down script
} else {
// display error
}
add a comment |
I'll try to itemize my review sequentially so that I don't miss anything.
Try to describe your custom function when naming it. The function is going to "filter the array" by one or more "languages" by a "minimum length". Perhaps
function keepByMinimumLengthAndLanguages()
. I don't know what I would settle on if this was my project. My suggestion risks being too wordy, but it does speak literally about its process.Stabilize your input types. If an incoming argument may be a string or an array, then just fix it to an array type to avoid having to cast the string as an array.
function keepByMinimumLengthAndLanguages(&$array, $min = 2, $langs = ['en'])
Now that your incoming
$langs
variable is expected to be an array... (If you cannot afford to replace other scripts that call this function, you can just write$langs = (array)$langs;
as the first line inside your custom function) ...you seem to require a bit of clean up on the language strings.
$language_whitelist = [
'english' => 'en',
'eng' => 'en',
'russian' => 'ru',
'rus' => 'ru',
'tajik' => 'tj',
'taj' => 'tj',
'tjk' => 'tj',
];
foreach ($langs as $lang) {
$lang_keys[$language_whitelist[$lang] ?? $lang] = null; // reduce to 2 chars or use original input
}
If the default
$min
value is2
, then I suppose you can build in a sanitizing step for this variable as well.if (!is_int($min) || $min < 1) { $min = 2; }
... or similar.
Now that you have the 2-character language keys, you can filter your array of language-specific character classes and use all character lists that qualify.
$language_letters =
[
'en' => 'qwertyuiopasdfghjklzxcvbnmQWERTYUIOPASDFGHJKLZXCVBNM',
'ru' => 'ёйцукенгшщзхъфывапролджэячсмитьбюЁЙЦУКЕНГШЩЗХЪФЫВАПРОЛДЖЭЯЧСМИТЬБЮ',
'tj' => 'ғӯқҳҷӣҒӮҚҲҶӢ',
];
$array = preg_grep('~^[' . implode(array_intersect_key($language_letters, $lang_keys)) . ']{' . $min . ',}$~u', $array);
Notice that I am not using a negated character class (I removed the
^
).
Because you are modifying by reference, you don't need to
return
anything. I suppose if you didn't want to force valid default values in your function, you could flag invalid incoming data and eitherreturn true
when everything goes swimmingly, or return an error message. This way you can modify by reference and check for errors.
if (keepByMinimumLengthAndLanguages($array, 3, ['en']) === true) {
// all good, use $array down script
} else {
// display error
}
I'll try to itemize my review sequentially so that I don't miss anything.
Try to describe your custom function when naming it. The function is going to "filter the array" by one or more "languages" by a "minimum length". Perhaps
function keepByMinimumLengthAndLanguages()
. I don't know what I would settle on if this was my project. My suggestion risks being too wordy, but it does speak literally about its process.Stabilize your input types. If an incoming argument may be a string or an array, then just fix it to an array type to avoid having to cast the string as an array.
function keepByMinimumLengthAndLanguages(&$array, $min = 2, $langs = ['en'])
Now that your incoming
$langs
variable is expected to be an array... (If you cannot afford to replace other scripts that call this function, you can just write$langs = (array)$langs;
as the first line inside your custom function) ...you seem to require a bit of clean up on the language strings.
$language_whitelist = [
'english' => 'en',
'eng' => 'en',
'russian' => 'ru',
'rus' => 'ru',
'tajik' => 'tj',
'taj' => 'tj',
'tjk' => 'tj',
];
foreach ($langs as $lang) {
$lang_keys[$language_whitelist[$lang] ?? $lang] = null; // reduce to 2 chars or use original input
}
If the default
$min
value is2
, then I suppose you can build in a sanitizing step for this variable as well.if (!is_int($min) || $min < 1) { $min = 2; }
... or similar.
Now that you have the 2-character language keys, you can filter your array of language-specific character classes and use all character lists that qualify.
$language_letters =
[
'en' => 'qwertyuiopasdfghjklzxcvbnmQWERTYUIOPASDFGHJKLZXCVBNM',
'ru' => 'ёйцукенгшщзхъфывапролджэячсмитьбюЁЙЦУКЕНГШЩЗХЪФЫВАПРОЛДЖЭЯЧСМИТЬБЮ',
'tj' => 'ғӯқҳҷӣҒӮҚҲҶӢ',
];
$array = preg_grep('~^[' . implode(array_intersect_key($language_letters, $lang_keys)) . ']{' . $min . ',}$~u', $array);
Notice that I am not using a negated character class (I removed the
^
).
Because you are modifying by reference, you don't need to
return
anything. I suppose if you didn't want to force valid default values in your function, you could flag invalid incoming data and eitherreturn true
when everything goes swimmingly, or return an error message. This way you can modify by reference and check for errors.
if (keepByMinimumLengthAndLanguages($array, 3, ['en']) === true) {
// all good, use $array down script
} else {
// display error
}
answered 5 mins ago
mickmackusamickmackusa
1,057112
1,057112
add a comment |
add a comment |
Thanks for contributing an answer to Code Review Stack Exchange!
- Please be sure to answer the question. Provide details and share your research!
But avoid …
- Asking for help, clarification, or responding to other answers.
- Making statements based on opinion; back them up with references or personal experience.
Use MathJax to format equations. MathJax reference.
To learn more, see our tips on writing great answers.
Sign up or log in
StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Required, but never shown
StackExchange.ready(
function () {
StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fcodereview.stackexchange.com%2fquestions%2f178302%2ffilter-out-strings-that-are-short-or-that-are-outside-the-alphabet-of-a-language%23new-answer', 'question_page');
}
);
Post as a guest
Required, but never shown
Sign up or log in
StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Required, but never shown
Sign up or log in
StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Required, but never shown
Sign up or log in
StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
1
Which one of the two examples would you like to be peer reviewed? Both or only one? Please state it clearly... or maybe you wish a comparative-review?
– t3chb0t
Oct 20 '17 at 11:15
I wish a comparative-review @t3chb0t
– Andreas Hunter
Oct 20 '17 at 12:28